xref: /openbmc/qemu/target/riscv/vector_helper.c (revision 022b4ecf)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "exec/memop.h"
22 #include "exec/exec-all.h"
23 #include "exec/helper-proto.h"
24 #include "tcg/tcg-gvec-desc.h"
25 #include "internals.h"
26 #include <math.h>
27 
28 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
29                             target_ulong s2)
30 {
31     int vlmax, vl;
32     RISCVCPU *cpu = env_archcpu(env);
33     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
34     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
35     bool vill = FIELD_EX64(s2, VTYPE, VILL);
36     target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
37 
38     if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
39         /* only set vill bit. */
40         env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
41         env->vl = 0;
42         env->vstart = 0;
43         return 0;
44     }
45 
46     vlmax = vext_get_vlmax(cpu, s2);
47     if (s1 <= vlmax) {
48         vl = s1;
49     } else {
50         vl = vlmax;
51     }
52     env->vl = vl;
53     env->vtype = s2;
54     env->vstart = 0;
55     return vl;
56 }
57 
58 /*
59  * Note that vector data is stored in host-endian 64-bit chunks,
60  * so addressing units smaller than that needs a host-endian fixup.
61  */
62 #ifdef HOST_WORDS_BIGENDIAN
63 #define H1(x)   ((x) ^ 7)
64 #define H1_2(x) ((x) ^ 6)
65 #define H1_4(x) ((x) ^ 4)
66 #define H2(x)   ((x) ^ 3)
67 #define H4(x)   ((x) ^ 1)
68 #define H8(x)   ((x))
69 #else
70 #define H1(x)   (x)
71 #define H1_2(x) (x)
72 #define H1_4(x) (x)
73 #define H2(x)   (x)
74 #define H4(x)   (x)
75 #define H8(x)   (x)
76 #endif
77 
78 static inline uint32_t vext_nf(uint32_t desc)
79 {
80     return FIELD_EX32(simd_data(desc), VDATA, NF);
81 }
82 
83 static inline uint32_t vext_mlen(uint32_t desc)
84 {
85     return FIELD_EX32(simd_data(desc), VDATA, MLEN);
86 }
87 
88 static inline uint32_t vext_vm(uint32_t desc)
89 {
90     return FIELD_EX32(simd_data(desc), VDATA, VM);
91 }
92 
93 static inline uint32_t vext_lmul(uint32_t desc)
94 {
95     return FIELD_EX32(simd_data(desc), VDATA, LMUL);
96 }
97 
98 /*
99  * Get vector group length in bytes. Its range is [64, 2048].
100  *
101  * As simd_desc support at most 256, the max vlen is 512 bits.
102  * So vlen in bytes is encoded as maxsz.
103  */
104 static inline uint32_t vext_maxsz(uint32_t desc)
105 {
106     return simd_maxsz(desc) << vext_lmul(desc);
107 }
108 
109 /*
110  * This function checks watchpoint before real load operation.
111  *
112  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
113  * In user mode, there is no watchpoint support now.
114  *
115  * It will trigger an exception if there is no mapping in TLB
116  * and page table walk can't fill the TLB entry. Then the guest
117  * software can return here after process the exception or never return.
118  */
119 static void probe_pages(CPURISCVState *env, target_ulong addr,
120                         target_ulong len, uintptr_t ra,
121                         MMUAccessType access_type)
122 {
123     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
124     target_ulong curlen = MIN(pagelen, len);
125 
126     probe_access(env, addr, curlen, access_type,
127                  cpu_mmu_index(env, false), ra);
128     if (len > curlen) {
129         addr += curlen;
130         curlen = len - curlen;
131         probe_access(env, addr, curlen, access_type,
132                      cpu_mmu_index(env, false), ra);
133     }
134 }
135 
136 #ifdef HOST_WORDS_BIGENDIAN
137 static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
138 {
139     /*
140      * Split the remaining range to two parts.
141      * The first part is in the last uint64_t unit.
142      * The second part start from the next uint64_t unit.
143      */
144     int part1 = 0, part2 = tot - cnt;
145     if (cnt % 8) {
146         part1 = 8 - (cnt % 8);
147         part2 = tot - cnt - part1;
148         memset((void *)((uintptr_t)tail & ~(7ULL)), 0, part1);
149         memset((void *)(((uintptr_t)tail + 8) & ~(7ULL)), 0, part2);
150     } else {
151         memset(tail, 0, part2);
152     }
153 }
154 #else
155 static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
156 {
157     memset(tail, 0, tot - cnt);
158 }
159 #endif
160 
161 static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
162 {
163     int8_t *cur = ((int8_t *)vd + H1(idx));
164     vext_clear(cur, cnt, tot);
165 }
166 
167 static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
168 {
169     int16_t *cur = ((int16_t *)vd + H2(idx));
170     vext_clear(cur, cnt, tot);
171 }
172 
173 static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
174 {
175     int32_t *cur = ((int32_t *)vd + H4(idx));
176     vext_clear(cur, cnt, tot);
177 }
178 
179 static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
180 {
181     int64_t *cur = (int64_t *)vd + idx;
182     vext_clear(cur, cnt, tot);
183 }
184 
185 
186 static inline int vext_elem_mask(void *v0, int mlen, int index)
187 {
188     int idx = (index * mlen) / 64;
189     int pos = (index * mlen) % 64;
190     return (((uint64_t *)v0)[idx] >> pos) & 1;
191 }
192 
193 /* elements operations for load and store */
194 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
195                                uint32_t idx, void *vd, uintptr_t retaddr);
196 typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot);
197 
198 #define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF)     \
199 static void NAME(CPURISCVState *env, abi_ptr addr,         \
200                  uint32_t idx, void *vd, uintptr_t retaddr)\
201 {                                                          \
202     MTYPE data;                                            \
203     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
204     data = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
205     *cur = data;                                           \
206 }                                                          \
207 
208 GEN_VEXT_LD_ELEM(ldb_b, int8_t,  int8_t,  H1, ldsb)
209 GEN_VEXT_LD_ELEM(ldb_h, int8_t,  int16_t, H2, ldsb)
210 GEN_VEXT_LD_ELEM(ldb_w, int8_t,  int32_t, H4, ldsb)
211 GEN_VEXT_LD_ELEM(ldb_d, int8_t,  int64_t, H8, ldsb)
212 GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw)
213 GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw)
214 GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw)
215 GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl)
216 GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl)
217 GEN_VEXT_LD_ELEM(lde_b, int8_t,  int8_t,  H1, ldsb)
218 GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw)
219 GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl)
220 GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq)
221 GEN_VEXT_LD_ELEM(ldbu_b, uint8_t,  uint8_t,  H1, ldub)
222 GEN_VEXT_LD_ELEM(ldbu_h, uint8_t,  uint16_t, H2, ldub)
223 GEN_VEXT_LD_ELEM(ldbu_w, uint8_t,  uint32_t, H4, ldub)
224 GEN_VEXT_LD_ELEM(ldbu_d, uint8_t,  uint64_t, H8, ldub)
225 GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw)
226 GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw)
227 GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw)
228 GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl)
229 GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl)
230 
231 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
232 static void NAME(CPURISCVState *env, abi_ptr addr,         \
233                  uint32_t idx, void *vd, uintptr_t retaddr)\
234 {                                                          \
235     ETYPE data = *((ETYPE *)vd + H(idx));                  \
236     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
237 }
238 
239 GEN_VEXT_ST_ELEM(stb_b, int8_t,  H1, stb)
240 GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb)
241 GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb)
242 GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb)
243 GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw)
244 GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw)
245 GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw)
246 GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl)
247 GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl)
248 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
249 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
250 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
251 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
252 
253 /*
254  *** stride: access vector element from strided memory
255  */
256 static void
257 vext_ldst_stride(void *vd, void *v0, target_ulong base,
258                  target_ulong stride, CPURISCVState *env,
259                  uint32_t desc, uint32_t vm,
260                  vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
261                  uint32_t esz, uint32_t msz, uintptr_t ra,
262                  MMUAccessType access_type)
263 {
264     uint32_t i, k;
265     uint32_t nf = vext_nf(desc);
266     uint32_t mlen = vext_mlen(desc);
267     uint32_t vlmax = vext_maxsz(desc) / esz;
268 
269     /* probe every access*/
270     for (i = 0; i < env->vl; i++) {
271         if (!vm && !vext_elem_mask(v0, mlen, i)) {
272             continue;
273         }
274         probe_pages(env, base + stride * i, nf * msz, ra, access_type);
275     }
276     /* do real access */
277     for (i = 0; i < env->vl; i++) {
278         k = 0;
279         if (!vm && !vext_elem_mask(v0, mlen, i)) {
280             continue;
281         }
282         while (k < nf) {
283             target_ulong addr = base + stride * i + k * msz;
284             ldst_elem(env, addr, i + k * vlmax, vd, ra);
285             k++;
286         }
287     }
288     /* clear tail elements */
289     if (clear_elem) {
290         for (k = 0; k < nf; k++) {
291             clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
292         }
293     }
294 }
295 
296 #define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)       \
297 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
298                   target_ulong stride, CPURISCVState *env,              \
299                   uint32_t desc)                                        \
300 {                                                                       \
301     uint32_t vm = vext_vm(desc);                                        \
302     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
303                      CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),            \
304                      GETPC(), MMU_DATA_LOAD);                           \
305 }
306 
307 GEN_VEXT_LD_STRIDE(vlsb_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
308 GEN_VEXT_LD_STRIDE(vlsb_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
309 GEN_VEXT_LD_STRIDE(vlsb_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
310 GEN_VEXT_LD_STRIDE(vlsb_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
311 GEN_VEXT_LD_STRIDE(vlsh_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
312 GEN_VEXT_LD_STRIDE(vlsh_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
313 GEN_VEXT_LD_STRIDE(vlsh_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
314 GEN_VEXT_LD_STRIDE(vlsw_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
315 GEN_VEXT_LD_STRIDE(vlsw_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
316 GEN_VEXT_LD_STRIDE(vlse_v_b,  int8_t,   int8_t,   lde_b,  clearb)
317 GEN_VEXT_LD_STRIDE(vlse_v_h,  int16_t,  int16_t,  lde_h,  clearh)
318 GEN_VEXT_LD_STRIDE(vlse_v_w,  int32_t,  int32_t,  lde_w,  clearl)
319 GEN_VEXT_LD_STRIDE(vlse_v_d,  int64_t,  int64_t,  lde_d,  clearq)
320 GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
321 GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
322 GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
323 GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
324 GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
325 GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
326 GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
327 GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
328 GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
329 
330 #define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN)                \
331 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
332                   target_ulong stride, CPURISCVState *env,              \
333                   uint32_t desc)                                        \
334 {                                                                       \
335     uint32_t vm = vext_vm(desc);                                        \
336     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
337                      NULL, sizeof(ETYPE), sizeof(MTYPE),                \
338                      GETPC(), MMU_DATA_STORE);                          \
339 }
340 
341 GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t,  int8_t,  stb_b)
342 GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t,  int16_t, stb_h)
343 GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t,  int32_t, stb_w)
344 GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t,  int64_t, stb_d)
345 GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h)
346 GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w)
347 GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d)
348 GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w)
349 GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d)
350 GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t,  int8_t,  ste_b)
351 GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h)
352 GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w)
353 GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d)
354 
355 /*
356  *** unit-stride: access elements stored contiguously in memory
357  */
358 
359 /* unmasked unit-stride load and store operation*/
360 static void
361 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
362              vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
363              uint32_t esz, uint32_t msz, uintptr_t ra,
364              MMUAccessType access_type)
365 {
366     uint32_t i, k;
367     uint32_t nf = vext_nf(desc);
368     uint32_t vlmax = vext_maxsz(desc) / esz;
369 
370     /* probe every access */
371     probe_pages(env, base, env->vl * nf * msz, ra, access_type);
372     /* load bytes from guest memory */
373     for (i = 0; i < env->vl; i++) {
374         k = 0;
375         while (k < nf) {
376             target_ulong addr = base + (i * nf + k) * msz;
377             ldst_elem(env, addr, i + k * vlmax, vd, ra);
378             k++;
379         }
380     }
381     /* clear tail elements */
382     if (clear_elem) {
383         for (k = 0; k < nf; k++) {
384             clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
385         }
386     }
387 }
388 
389 /*
390  * masked unit-stride load and store operation will be a special case of stride,
391  * stride = NF * sizeof (MTYPE)
392  */
393 
394 #define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)           \
395 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
396                          CPURISCVState *env, uint32_t desc)             \
397 {                                                                       \
398     uint32_t stride = vext_nf(desc) * sizeof(MTYPE);                    \
399     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
400                      CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),            \
401                      GETPC(), MMU_DATA_LOAD);                           \
402 }                                                                       \
403                                                                         \
404 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
405                   CPURISCVState *env, uint32_t desc)                    \
406 {                                                                       \
407     vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN,                \
408                  sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \
409 }
410 
411 GEN_VEXT_LD_US(vlb_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
412 GEN_VEXT_LD_US(vlb_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
413 GEN_VEXT_LD_US(vlb_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
414 GEN_VEXT_LD_US(vlb_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
415 GEN_VEXT_LD_US(vlh_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
416 GEN_VEXT_LD_US(vlh_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
417 GEN_VEXT_LD_US(vlh_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
418 GEN_VEXT_LD_US(vlw_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
419 GEN_VEXT_LD_US(vlw_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
420 GEN_VEXT_LD_US(vle_v_b,  int8_t,   int8_t,   lde_b,  clearb)
421 GEN_VEXT_LD_US(vle_v_h,  int16_t,  int16_t,  lde_h,  clearh)
422 GEN_VEXT_LD_US(vle_v_w,  int32_t,  int32_t,  lde_w,  clearl)
423 GEN_VEXT_LD_US(vle_v_d,  int64_t,  int64_t,  lde_d,  clearq)
424 GEN_VEXT_LD_US(vlbu_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
425 GEN_VEXT_LD_US(vlbu_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
426 GEN_VEXT_LD_US(vlbu_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
427 GEN_VEXT_LD_US(vlbu_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
428 GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
429 GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
430 GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
431 GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
432 GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
433 
434 #define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN)                    \
435 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
436                          CPURISCVState *env, uint32_t desc)             \
437 {                                                                       \
438     uint32_t stride = vext_nf(desc) * sizeof(MTYPE);                    \
439     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,  \
440                      NULL, sizeof(ETYPE), sizeof(MTYPE),                \
441                      GETPC(), MMU_DATA_STORE);                          \
442 }                                                                       \
443                                                                         \
444 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
445                   CPURISCVState *env, uint32_t desc)                    \
446 {                                                                       \
447     vext_ldst_us(vd, base, env, desc, STORE_FN, NULL,                   \
448                  sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\
449 }
450 
451 GEN_VEXT_ST_US(vsb_v_b, int8_t,  int8_t , stb_b)
452 GEN_VEXT_ST_US(vsb_v_h, int8_t,  int16_t, stb_h)
453 GEN_VEXT_ST_US(vsb_v_w, int8_t,  int32_t, stb_w)
454 GEN_VEXT_ST_US(vsb_v_d, int8_t,  int64_t, stb_d)
455 GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h)
456 GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w)
457 GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d)
458 GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w)
459 GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d)
460 GEN_VEXT_ST_US(vse_v_b, int8_t,  int8_t , ste_b)
461 GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h)
462 GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w)
463 GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d)
464 
465 /*
466  *** index: access vector element from indexed memory
467  */
468 typedef target_ulong vext_get_index_addr(target_ulong base,
469         uint32_t idx, void *vs2);
470 
471 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
472 static target_ulong NAME(target_ulong base,            \
473                          uint32_t idx, void *vs2)      \
474 {                                                      \
475     return (base + *((ETYPE *)vs2 + H(idx)));          \
476 }
477 
478 GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t,  H1)
479 GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2)
480 GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4)
481 GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8)
482 
483 static inline void
484 vext_ldst_index(void *vd, void *v0, target_ulong base,
485                 void *vs2, CPURISCVState *env, uint32_t desc,
486                 vext_get_index_addr get_index_addr,
487                 vext_ldst_elem_fn *ldst_elem,
488                 clear_fn *clear_elem,
489                 uint32_t esz, uint32_t msz, uintptr_t ra,
490                 MMUAccessType access_type)
491 {
492     uint32_t i, k;
493     uint32_t nf = vext_nf(desc);
494     uint32_t vm = vext_vm(desc);
495     uint32_t mlen = vext_mlen(desc);
496     uint32_t vlmax = vext_maxsz(desc) / esz;
497 
498     /* probe every access*/
499     for (i = 0; i < env->vl; i++) {
500         if (!vm && !vext_elem_mask(v0, mlen, i)) {
501             continue;
502         }
503         probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra,
504                     access_type);
505     }
506     /* load bytes from guest memory */
507     for (i = 0; i < env->vl; i++) {
508         k = 0;
509         if (!vm && !vext_elem_mask(v0, mlen, i)) {
510             continue;
511         }
512         while (k < nf) {
513             abi_ptr addr = get_index_addr(base, i, vs2) + k * msz;
514             ldst_elem(env, addr, i + k * vlmax, vd, ra);
515             k++;
516         }
517     }
518     /* clear tail elements */
519     if (clear_elem) {
520         for (k = 0; k < nf; k++) {
521             clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
522         }
523     }
524 }
525 
526 #define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \
527 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
528                   void *vs2, CPURISCVState *env, uint32_t desc)            \
529 {                                                                          \
530     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
531                     LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),       \
532                     GETPC(), MMU_DATA_LOAD);                               \
533 }
534 
535 GEN_VEXT_LD_INDEX(vlxb_v_b,  int8_t,   int8_t,   idx_b, ldb_b,  clearb)
536 GEN_VEXT_LD_INDEX(vlxb_v_h,  int8_t,   int16_t,  idx_h, ldb_h,  clearh)
537 GEN_VEXT_LD_INDEX(vlxb_v_w,  int8_t,   int32_t,  idx_w, ldb_w,  clearl)
538 GEN_VEXT_LD_INDEX(vlxb_v_d,  int8_t,   int64_t,  idx_d, ldb_d,  clearq)
539 GEN_VEXT_LD_INDEX(vlxh_v_h,  int16_t,  int16_t,  idx_h, ldh_h,  clearh)
540 GEN_VEXT_LD_INDEX(vlxh_v_w,  int16_t,  int32_t,  idx_w, ldh_w,  clearl)
541 GEN_VEXT_LD_INDEX(vlxh_v_d,  int16_t,  int64_t,  idx_d, ldh_d,  clearq)
542 GEN_VEXT_LD_INDEX(vlxw_v_w,  int32_t,  int32_t,  idx_w, ldw_w,  clearl)
543 GEN_VEXT_LD_INDEX(vlxw_v_d,  int32_t,  int64_t,  idx_d, ldw_d,  clearq)
544 GEN_VEXT_LD_INDEX(vlxe_v_b,  int8_t,   int8_t,   idx_b, lde_b,  clearb)
545 GEN_VEXT_LD_INDEX(vlxe_v_h,  int16_t,  int16_t,  idx_h, lde_h,  clearh)
546 GEN_VEXT_LD_INDEX(vlxe_v_w,  int32_t,  int32_t,  idx_w, lde_w,  clearl)
547 GEN_VEXT_LD_INDEX(vlxe_v_d,  int64_t,  int64_t,  idx_d, lde_d,  clearq)
548 GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t,  uint8_t,  idx_b, ldbu_b, clearb)
549 GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t,  uint16_t, idx_h, ldbu_h, clearh)
550 GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t,  uint32_t, idx_w, ldbu_w, clearl)
551 GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t,  uint64_t, idx_d, ldbu_d, clearq)
552 GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh)
553 GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl)
554 GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq)
555 GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl)
556 GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq)
557 
558 #define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\
559 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
560                   void *vs2, CPURISCVState *env, uint32_t desc)  \
561 {                                                                \
562     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
563                     STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\
564                     GETPC(), MMU_DATA_STORE);                    \
565 }
566 
567 GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t,  int8_t,  idx_b, stb_b)
568 GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t,  int16_t, idx_h, stb_h)
569 GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t,  int32_t, idx_w, stb_w)
570 GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t,  int64_t, idx_d, stb_d)
571 GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h)
572 GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w)
573 GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d)
574 GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w)
575 GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d)
576 GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t,  int8_t,  idx_b, ste_b)
577 GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h)
578 GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w)
579 GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d)
580 
581 /*
582  *** unit-stride fault-only-fisrt load instructions
583  */
584 static inline void
585 vext_ldff(void *vd, void *v0, target_ulong base,
586           CPURISCVState *env, uint32_t desc,
587           vext_ldst_elem_fn *ldst_elem,
588           clear_fn *clear_elem,
589           uint32_t esz, uint32_t msz, uintptr_t ra)
590 {
591     void *host;
592     uint32_t i, k, vl = 0;
593     uint32_t mlen = vext_mlen(desc);
594     uint32_t nf = vext_nf(desc);
595     uint32_t vm = vext_vm(desc);
596     uint32_t vlmax = vext_maxsz(desc) / esz;
597     target_ulong addr, offset, remain;
598 
599     /* probe every access*/
600     for (i = 0; i < env->vl; i++) {
601         if (!vm && !vext_elem_mask(v0, mlen, i)) {
602             continue;
603         }
604         addr = base + nf * i * msz;
605         if (i == 0) {
606             probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
607         } else {
608             /* if it triggers an exception, no need to check watchpoint */
609             remain = nf * msz;
610             while (remain > 0) {
611                 offset = -(addr | TARGET_PAGE_MASK);
612                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
613                                          cpu_mmu_index(env, false));
614                 if (host) {
615 #ifdef CONFIG_USER_ONLY
616                     if (page_check_range(addr, nf * msz, PAGE_READ) < 0) {
617                         vl = i;
618                         goto ProbeSuccess;
619                     }
620 #else
621                     probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
622 #endif
623                 } else {
624                     vl = i;
625                     goto ProbeSuccess;
626                 }
627                 if (remain <=  offset) {
628                     break;
629                 }
630                 remain -= offset;
631                 addr += offset;
632             }
633         }
634     }
635 ProbeSuccess:
636     /* load bytes from guest memory */
637     if (vl != 0) {
638         env->vl = vl;
639     }
640     for (i = 0; i < env->vl; i++) {
641         k = 0;
642         if (!vm && !vext_elem_mask(v0, mlen, i)) {
643             continue;
644         }
645         while (k < nf) {
646             target_ulong addr = base + (i * nf + k) * msz;
647             ldst_elem(env, addr, i + k * vlmax, vd, ra);
648             k++;
649         }
650     }
651     /* clear tail elements */
652     if (vl != 0) {
653         return;
654     }
655     for (k = 0; k < nf; k++) {
656         clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
657     }
658 }
659 
660 #define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)     \
661 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
662                   CPURISCVState *env, uint32_t desc)             \
663 {                                                                \
664     vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN,        \
665               sizeof(ETYPE), sizeof(MTYPE), GETPC());            \
666 }
667 
668 GEN_VEXT_LDFF(vlbff_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
669 GEN_VEXT_LDFF(vlbff_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
670 GEN_VEXT_LDFF(vlbff_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
671 GEN_VEXT_LDFF(vlbff_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
672 GEN_VEXT_LDFF(vlhff_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
673 GEN_VEXT_LDFF(vlhff_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
674 GEN_VEXT_LDFF(vlhff_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
675 GEN_VEXT_LDFF(vlwff_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
676 GEN_VEXT_LDFF(vlwff_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
677 GEN_VEXT_LDFF(vleff_v_b,  int8_t,   int8_t,   lde_b,  clearb)
678 GEN_VEXT_LDFF(vleff_v_h,  int16_t,  int16_t,  lde_h,  clearh)
679 GEN_VEXT_LDFF(vleff_v_w,  int32_t,  int32_t,  lde_w,  clearl)
680 GEN_VEXT_LDFF(vleff_v_d,  int64_t,  int64_t,  lde_d,  clearq)
681 GEN_VEXT_LDFF(vlbuff_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
682 GEN_VEXT_LDFF(vlbuff_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
683 GEN_VEXT_LDFF(vlbuff_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
684 GEN_VEXT_LDFF(vlbuff_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
685 GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh)
686 GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl)
687 GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq)
688 GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl)
689 GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq)
690