xref: /openbmc/qemu/target/riscv/vector_helper.c (revision 83fcd573)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "exec/memop.h"
22 #include "exec/exec-all.h"
23 #include "exec/helper-proto.h"
24 #include "fpu/softfloat.h"
25 #include "tcg/tcg-gvec-desc.h"
26 #include "internals.h"
27 #include <math.h>
28 
29 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
30                             target_ulong s2)
31 {
32     int vlmax, vl;
33     RISCVCPU *cpu = env_archcpu(env);
34     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
35     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
36     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
37     bool vill = FIELD_EX64(s2, VTYPE, VILL);
38     target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
39 
40     if (lmul & 4) {
41         /* Fractional LMUL. */
42         if (lmul == 4 ||
43             cpu->cfg.elen >> (8 - lmul) < sew) {
44             vill = true;
45         }
46     }
47 
48     if ((sew > cpu->cfg.elen)
49         || vill
50         || (ediv != 0)
51         || (reserved != 0)) {
52         /* only set vill bit. */
53         env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
54         env->vl = 0;
55         env->vstart = 0;
56         return 0;
57     }
58 
59     vlmax = vext_get_vlmax(cpu, s2);
60     if (s1 <= vlmax) {
61         vl = s1;
62     } else {
63         vl = vlmax;
64     }
65     env->vl = vl;
66     env->vtype = s2;
67     env->vstart = 0;
68     return vl;
69 }
70 
71 /*
72  * Note that vector data is stored in host-endian 64-bit chunks,
73  * so addressing units smaller than that needs a host-endian fixup.
74  */
75 #ifdef HOST_WORDS_BIGENDIAN
76 #define H1(x)   ((x) ^ 7)
77 #define H1_2(x) ((x) ^ 6)
78 #define H1_4(x) ((x) ^ 4)
79 #define H2(x)   ((x) ^ 3)
80 #define H4(x)   ((x) ^ 1)
81 #define H8(x)   ((x))
82 #else
83 #define H1(x)   (x)
84 #define H1_2(x) (x)
85 #define H1_4(x) (x)
86 #define H2(x)   (x)
87 #define H4(x)   (x)
88 #define H8(x)   (x)
89 #endif
90 
91 static inline uint32_t vext_nf(uint32_t desc)
92 {
93     return FIELD_EX32(simd_data(desc), VDATA, NF);
94 }
95 
96 static inline uint32_t vext_vm(uint32_t desc)
97 {
98     return FIELD_EX32(simd_data(desc), VDATA, VM);
99 }
100 
101 /*
102  * Encode LMUL to lmul as following:
103  *     LMUL    vlmul    lmul
104  *      1       000       0
105  *      2       001       1
106  *      4       010       2
107  *      8       011       3
108  *      -       100       -
109  *     1/8      101      -3
110  *     1/4      110      -2
111  *     1/2      111      -1
112  */
113 static inline int32_t vext_lmul(uint32_t desc)
114 {
115     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
116 }
117 
118 /*
119  * Get vector group length in bytes. Its range is [64, 2048].
120  *
121  * As simd_desc support at most 256, the max vlen is 512 bits.
122  * So vlen in bytes is encoded as maxsz.
123  */
124 static inline uint32_t vext_maxsz(uint32_t desc)
125 {
126     return simd_maxsz(desc) << vext_lmul(desc);
127 }
128 
129 /*
130  * This function checks watchpoint before real load operation.
131  *
132  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
133  * In user mode, there is no watchpoint support now.
134  *
135  * It will trigger an exception if there is no mapping in TLB
136  * and page table walk can't fill the TLB entry. Then the guest
137  * software can return here after process the exception or never return.
138  */
139 static void probe_pages(CPURISCVState *env, target_ulong addr,
140                         target_ulong len, uintptr_t ra,
141                         MMUAccessType access_type)
142 {
143     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
144     target_ulong curlen = MIN(pagelen, len);
145 
146     probe_access(env, addr, curlen, access_type,
147                  cpu_mmu_index(env, false), ra);
148     if (len > curlen) {
149         addr += curlen;
150         curlen = len - curlen;
151         probe_access(env, addr, curlen, access_type,
152                      cpu_mmu_index(env, false), ra);
153     }
154 }
155 
156 static inline void vext_set_elem_mask(void *v0, int index,
157                                       uint8_t value)
158 {
159     int idx = index / 64;
160     int pos = index % 64;
161     uint64_t old = ((uint64_t *)v0)[idx];
162     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
163 }
164 
165 /*
166  * Earlier designs (pre-0.9) had a varying number of bits
167  * per mask value (MLEN). In the 0.9 design, MLEN=1.
168  * (Section 4.5)
169  */
170 static inline int vext_elem_mask(void *v0, int index)
171 {
172     int idx = index / 64;
173     int pos = index  % 64;
174     return (((uint64_t *)v0)[idx] >> pos) & 1;
175 }
176 
177 /* elements operations for load and store */
178 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
179                                uint32_t idx, void *vd, uintptr_t retaddr);
180 
181 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
182 static void NAME(CPURISCVState *env, abi_ptr addr,         \
183                  uint32_t idx, void *vd, uintptr_t retaddr)\
184 {                                                          \
185     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
186     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
187 }                                                          \
188 
189 GEN_VEXT_LD_ELEM(ldb_b, int8_t,  H1, ldsb)
190 GEN_VEXT_LD_ELEM(ldb_h, int16_t, H2, ldsb)
191 GEN_VEXT_LD_ELEM(ldb_w, int32_t, H4, ldsb)
192 GEN_VEXT_LD_ELEM(ldb_d, int64_t, H8, ldsb)
193 GEN_VEXT_LD_ELEM(ldh_h, int16_t, H2, ldsw)
194 GEN_VEXT_LD_ELEM(ldh_w, int32_t, H4, ldsw)
195 GEN_VEXT_LD_ELEM(ldh_d, int64_t, H8, ldsw)
196 GEN_VEXT_LD_ELEM(ldw_w, int32_t, H4, ldl)
197 GEN_VEXT_LD_ELEM(ldw_d, int64_t, H8, ldl)
198 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
199 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
200 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
201 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
202 GEN_VEXT_LD_ELEM(ldbu_b, uint8_t,  H1, ldub)
203 GEN_VEXT_LD_ELEM(ldbu_h, uint16_t, H2, ldub)
204 GEN_VEXT_LD_ELEM(ldbu_w, uint32_t, H4, ldub)
205 GEN_VEXT_LD_ELEM(ldbu_d, uint64_t, H8, ldub)
206 GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, H2, lduw)
207 GEN_VEXT_LD_ELEM(ldhu_w, uint32_t, H4, lduw)
208 GEN_VEXT_LD_ELEM(ldhu_d, uint64_t, H8, lduw)
209 GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, H4, ldl)
210 GEN_VEXT_LD_ELEM(ldwu_d, uint64_t, H8, ldl)
211 
212 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
213 static void NAME(CPURISCVState *env, abi_ptr addr,         \
214                  uint32_t idx, void *vd, uintptr_t retaddr)\
215 {                                                          \
216     ETYPE data = *((ETYPE *)vd + H(idx));                  \
217     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
218 }
219 
220 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
221 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
222 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
223 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
224 
225 /*
226  *** stride: access vector element from strided memory
227  */
228 static void
229 vext_ldst_stride(void *vd, void *v0, target_ulong base,
230                  target_ulong stride, CPURISCVState *env,
231                  uint32_t desc, uint32_t vm,
232                  vext_ldst_elem_fn *ldst_elem,
233                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
234 {
235     uint32_t i, k;
236     uint32_t nf = vext_nf(desc);
237     uint32_t vlmax = vext_maxsz(desc) / esz;
238 
239     /* probe every access*/
240     for (i = 0; i < env->vl; i++) {
241         if (!vm && !vext_elem_mask(v0, i)) {
242             continue;
243         }
244         probe_pages(env, base + stride * i, nf * esz, ra, access_type);
245     }
246     /* do real access */
247     for (i = 0; i < env->vl; i++) {
248         k = 0;
249         if (!vm && !vext_elem_mask(v0, i)) {
250             continue;
251         }
252         while (k < nf) {
253             target_ulong addr = base + stride * i + k * esz;
254             ldst_elem(env, addr, i + k * vlmax, vd, ra);
255             k++;
256         }
257     }
258 }
259 
260 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
261 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
262                   target_ulong stride, CPURISCVState *env,              \
263                   uint32_t desc)                                        \
264 {                                                                       \
265     uint32_t vm = vext_vm(desc);                                        \
266     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
267                      sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);            \
268 }
269 
270 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
271 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
272 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
273 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
274 
275 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
276 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
277                   target_ulong stride, CPURISCVState *env,              \
278                   uint32_t desc)                                        \
279 {                                                                       \
280     uint32_t vm = vext_vm(desc);                                        \
281     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
282                      sizeof(ETYPE), GETPC(), MMU_DATA_STORE);           \
283 }
284 
285 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
286 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
287 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
288 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
289 
290 /*
291  *** unit-stride: access elements stored contiguously in memory
292  */
293 
294 /* unmasked unit-stride load and store operation*/
295 static void
296 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
297              vext_ldst_elem_fn *ldst_elem,
298              uint32_t esz, uintptr_t ra, MMUAccessType access_type)
299 {
300     uint32_t i, k;
301     uint32_t nf = vext_nf(desc);
302     uint32_t vlmax = vext_maxsz(desc) / esz;
303 
304     /* probe every access */
305     probe_pages(env, base, env->vl * nf * esz, ra, access_type);
306     /* load bytes from guest memory */
307     for (i = 0; i < env->vl; i++) {
308         k = 0;
309         while (k < nf) {
310             target_ulong addr = base + (i * nf + k) * esz;
311             ldst_elem(env, addr, i + k * vlmax, vd, ra);
312             k++;
313         }
314     }
315 }
316 
317 /*
318  * masked unit-stride load and store operation will be a special case of stride,
319  * stride = NF * sizeof (MTYPE)
320  */
321 
322 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
323 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
324                          CPURISCVState *env, uint32_t desc)             \
325 {                                                                       \
326     uint32_t stride = vext_nf(desc) * sizeof(ETYPE);                    \
327     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
328                      sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);            \
329 }                                                                       \
330                                                                         \
331 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
332                   CPURISCVState *env, uint32_t desc)                    \
333 {                                                                       \
334     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
335                  sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);                \
336 }
337 
338 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
339 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
340 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
341 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
342 
343 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                           \
344 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
345                          CPURISCVState *env, uint32_t desc)             \
346 {                                                                       \
347     uint32_t stride = vext_nf(desc) * sizeof(ETYPE);                    \
348     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,  \
349                      sizeof(ETYPE), GETPC(), MMU_DATA_STORE);           \
350 }                                                                       \
351                                                                         \
352 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
353                   CPURISCVState *env, uint32_t desc)                    \
354 {                                                                       \
355     vext_ldst_us(vd, base, env, desc, STORE_FN,                         \
356                  sizeof(ETYPE), GETPC(), MMU_DATA_STORE);               \
357 }
358 
359 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
360 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
361 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
362 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
363 
364 /*
365  *** index: access vector element from indexed memory
366  */
367 typedef target_ulong vext_get_index_addr(target_ulong base,
368         uint32_t idx, void *vs2);
369 
370 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
371 static target_ulong NAME(target_ulong base,            \
372                          uint32_t idx, void *vs2)      \
373 {                                                      \
374     return (base + *((ETYPE *)vs2 + H(idx)));          \
375 }
376 
377 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
378 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
379 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
380 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
381 
382 static inline void
383 vext_ldst_index(void *vd, void *v0, target_ulong base,
384                 void *vs2, CPURISCVState *env, uint32_t desc,
385                 vext_get_index_addr get_index_addr,
386                 vext_ldst_elem_fn *ldst_elem,
387                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
388 {
389     uint32_t i, k;
390     uint32_t nf = vext_nf(desc);
391     uint32_t vm = vext_vm(desc);
392     uint32_t vlmax = vext_maxsz(desc) / esz;
393 
394     /* probe every access*/
395     for (i = 0; i < env->vl; i++) {
396         if (!vm && !vext_elem_mask(v0, i)) {
397             continue;
398         }
399         probe_pages(env, get_index_addr(base, i, vs2), nf * esz, ra,
400                     access_type);
401     }
402     /* load bytes from guest memory */
403     for (i = 0; i < env->vl; i++) {
404         k = 0;
405         if (!vm && !vext_elem_mask(v0, i)) {
406             continue;
407         }
408         while (k < nf) {
409             abi_ptr addr = get_index_addr(base, i, vs2) + k * esz;
410             ldst_elem(env, addr, i + k * vlmax, vd, ra);
411             k++;
412         }
413     }
414 }
415 
416 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
417 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
418                   void *vs2, CPURISCVState *env, uint32_t desc)            \
419 {                                                                          \
420     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
421                     LOAD_FN, sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);       \
422 }
423 
424 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
425 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
426 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
427 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
428 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
429 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
430 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
431 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
432 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
433 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
434 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
435 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
436 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
437 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
438 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
439 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
440 
441 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
442 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
443                   void *vs2, CPURISCVState *env, uint32_t desc)  \
444 {                                                                \
445     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
446                     STORE_FN, sizeof(ETYPE),                     \
447                     GETPC(), MMU_DATA_STORE);                    \
448 }
449 
450 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
451 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
452 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
453 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
454 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
455 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
456 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
457 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
458 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
459 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
460 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
461 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
462 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
463 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
464 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
465 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
466 
467 /*
468  *** unit-stride fault-only-fisrt load instructions
469  */
470 static inline void
471 vext_ldff(void *vd, void *v0, target_ulong base,
472           CPURISCVState *env, uint32_t desc,
473           vext_ldst_elem_fn *ldst_elem,
474           uint32_t esz, uint32_t msz, uintptr_t ra)
475 {
476     void *host;
477     uint32_t i, k, vl = 0;
478     uint32_t nf = vext_nf(desc);
479     uint32_t vm = vext_vm(desc);
480     uint32_t vlmax = vext_maxsz(desc) / esz;
481     target_ulong addr, offset, remain;
482 
483     /* probe every access*/
484     for (i = 0; i < env->vl; i++) {
485         if (!vm && !vext_elem_mask(v0, i)) {
486             continue;
487         }
488         addr = base + nf * i * msz;
489         if (i == 0) {
490             probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
491         } else {
492             /* if it triggers an exception, no need to check watchpoint */
493             remain = nf * msz;
494             while (remain > 0) {
495                 offset = -(addr | TARGET_PAGE_MASK);
496                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
497                                          cpu_mmu_index(env, false));
498                 if (host) {
499 #ifdef CONFIG_USER_ONLY
500                     if (page_check_range(addr, nf * msz, PAGE_READ) < 0) {
501                         vl = i;
502                         goto ProbeSuccess;
503                     }
504 #else
505                     probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
506 #endif
507                 } else {
508                     vl = i;
509                     goto ProbeSuccess;
510                 }
511                 if (remain <=  offset) {
512                     break;
513                 }
514                 remain -= offset;
515                 addr += offset;
516             }
517         }
518     }
519 ProbeSuccess:
520     /* load bytes from guest memory */
521     if (vl != 0) {
522         env->vl = vl;
523     }
524     for (i = 0; i < env->vl; i++) {
525         k = 0;
526         if (!vm && !vext_elem_mask(v0, i)) {
527             continue;
528         }
529         while (k < nf) {
530             target_ulong addr = base + (i * nf + k) * msz;
531             ldst_elem(env, addr, i + k * vlmax, vd, ra);
532             k++;
533         }
534     }
535 }
536 
537 #define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN)               \
538 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
539                   CPURISCVState *env, uint32_t desc)             \
540 {                                                                \
541     vext_ldff(vd, v0, base, env, desc, LOAD_FN,                  \
542               sizeof(ETYPE), sizeof(MTYPE), GETPC());            \
543 }
544 
545 GEN_VEXT_LDFF(vlbff_v_b,  int8_t,   int8_t,   ldb_b)
546 GEN_VEXT_LDFF(vlbff_v_h,  int8_t,   int16_t,  ldb_h)
547 GEN_VEXT_LDFF(vlbff_v_w,  int8_t,   int32_t,  ldb_w)
548 GEN_VEXT_LDFF(vlbff_v_d,  int8_t,   int64_t,  ldb_d)
549 GEN_VEXT_LDFF(vlhff_v_h,  int16_t,  int16_t,  ldh_h)
550 GEN_VEXT_LDFF(vlhff_v_w,  int16_t,  int32_t,  ldh_w)
551 GEN_VEXT_LDFF(vlhff_v_d,  int16_t,  int64_t,  ldh_d)
552 GEN_VEXT_LDFF(vlwff_v_w,  int32_t,  int32_t,  ldw_w)
553 GEN_VEXT_LDFF(vlwff_v_d,  int32_t,  int64_t,  ldw_d)
554 GEN_VEXT_LDFF(vleff_v_b,  int8_t,   int8_t,   lde_b)
555 GEN_VEXT_LDFF(vleff_v_h,  int16_t,  int16_t,  lde_h)
556 GEN_VEXT_LDFF(vleff_v_w,  int32_t,  int32_t,  lde_w)
557 GEN_VEXT_LDFF(vleff_v_d,  int64_t,  int64_t,  lde_d)
558 GEN_VEXT_LDFF(vlbuff_v_b, uint8_t,  uint8_t,  ldbu_b)
559 GEN_VEXT_LDFF(vlbuff_v_h, uint8_t,  uint16_t, ldbu_h)
560 GEN_VEXT_LDFF(vlbuff_v_w, uint8_t,  uint32_t, ldbu_w)
561 GEN_VEXT_LDFF(vlbuff_v_d, uint8_t,  uint64_t, ldbu_d)
562 GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h)
563 GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w)
564 GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d)
565 GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w)
566 GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d)
567 
568 #define DO_SWAP(N, M) (M)
569 #define DO_AND(N, M)  (N & M)
570 #define DO_XOR(N, M)  (N ^ M)
571 #define DO_OR(N, M)   (N | M)
572 #define DO_ADD(N, M)  (N + M)
573 
574 /* Signed min/max */
575 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
576 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
577 
578 /* Unsigned min/max */
579 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
580 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
581 
582 /*
583  *** Vector Integer Arithmetic Instructions
584  */
585 
586 /* expand macro args before macro */
587 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
588 
589 /* (TD, T1, T2, TX1, TX2) */
590 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
591 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
592 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
593 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
594 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
595 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
596 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
597 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
598 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
599 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
600 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
601 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
602 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
603 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
604 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
605 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
606 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
607 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
608 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
609 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
610 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
611 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
612 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
613 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
614 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
615 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
616 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
617 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
618 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
619 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
620 
621 /* operation of two vector elements */
622 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
623 
624 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
625 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
626 {                                                               \
627     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
628     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
629     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
630 }
631 #define DO_SUB(N, M) (N - M)
632 #define DO_RSUB(N, M) (M - N)
633 
634 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
635 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
636 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
637 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
638 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
639 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
640 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
641 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
642 
643 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
644                        CPURISCVState *env, uint32_t desc,
645                        uint32_t esz, uint32_t dsz,
646                        opivv2_fn *fn)
647 {
648     uint32_t vm = vext_vm(desc);
649     uint32_t vl = env->vl;
650     uint32_t i;
651 
652     for (i = 0; i < vl; i++) {
653         if (!vm && !vext_elem_mask(v0, i)) {
654             continue;
655         }
656         fn(vd, vs1, vs2, i);
657     }
658 }
659 
660 /* generate the helpers for OPIVV */
661 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
662 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
663                   void *vs2, CPURISCVState *env,          \
664                   uint32_t desc)                          \
665 {                                                         \
666     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
667                do_##NAME);                                \
668 }
669 
670 GEN_VEXT_VV(vadd_vv_b, 1, 1)
671 GEN_VEXT_VV(vadd_vv_h, 2, 2)
672 GEN_VEXT_VV(vadd_vv_w, 4, 4)
673 GEN_VEXT_VV(vadd_vv_d, 8, 8)
674 GEN_VEXT_VV(vsub_vv_b, 1, 1)
675 GEN_VEXT_VV(vsub_vv_h, 2, 2)
676 GEN_VEXT_VV(vsub_vv_w, 4, 4)
677 GEN_VEXT_VV(vsub_vv_d, 8, 8)
678 
679 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
680 
681 /*
682  * (T1)s1 gives the real operator type.
683  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
684  */
685 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
686 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
687 {                                                                   \
688     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
689     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
690 }
691 
692 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
693 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
694 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
695 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
696 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
697 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
698 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
699 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
700 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
701 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
702 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
703 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
704 
705 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
706                        CPURISCVState *env, uint32_t desc,
707                        uint32_t esz, uint32_t dsz,
708                        opivx2_fn fn)
709 {
710     uint32_t vm = vext_vm(desc);
711     uint32_t vl = env->vl;
712     uint32_t i;
713 
714     for (i = 0; i < vl; i++) {
715         if (!vm && !vext_elem_mask(v0, i)) {
716             continue;
717         }
718         fn(vd, s1, vs2, i);
719     }
720 }
721 
722 /* generate the helpers for OPIVX */
723 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
724 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
725                   void *vs2, CPURISCVState *env,          \
726                   uint32_t desc)                          \
727 {                                                         \
728     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
729                do_##NAME);                                \
730 }
731 
732 GEN_VEXT_VX(vadd_vx_b, 1, 1)
733 GEN_VEXT_VX(vadd_vx_h, 2, 2)
734 GEN_VEXT_VX(vadd_vx_w, 4, 4)
735 GEN_VEXT_VX(vadd_vx_d, 8, 8)
736 GEN_VEXT_VX(vsub_vx_b, 1, 1)
737 GEN_VEXT_VX(vsub_vx_h, 2, 2)
738 GEN_VEXT_VX(vsub_vx_w, 4, 4)
739 GEN_VEXT_VX(vsub_vx_d, 8, 8)
740 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
741 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
742 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
743 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
744 
745 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
746 {
747     intptr_t oprsz = simd_oprsz(desc);
748     intptr_t i;
749 
750     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
751         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
752     }
753 }
754 
755 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
756 {
757     intptr_t oprsz = simd_oprsz(desc);
758     intptr_t i;
759 
760     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
761         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
762     }
763 }
764 
765 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
766 {
767     intptr_t oprsz = simd_oprsz(desc);
768     intptr_t i;
769 
770     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
771         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
772     }
773 }
774 
775 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
776 {
777     intptr_t oprsz = simd_oprsz(desc);
778     intptr_t i;
779 
780     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
781         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
782     }
783 }
784 
785 /* Vector Widening Integer Add/Subtract */
786 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
787 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
788 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
789 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
790 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
791 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
792 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
793 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
794 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
795 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
796 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
797 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
798 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
799 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
800 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
801 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
802 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
803 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
804 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
805 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
806 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
807 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
808 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
809 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
810 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
811 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
812 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
813 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
814 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
815 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
816 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
817 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
818 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
819 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
820 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
821 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
822 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
823 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
824 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
825 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
826 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
827 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
828 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
829 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
830 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
831 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
832 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
833 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
834 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
835 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
836 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
837 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
838 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
839 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
840 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
841 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
842 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
843 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
844 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
845 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
846 
847 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
848 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
849 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
850 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
851 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
852 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
853 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
854 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
855 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
856 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
857 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
858 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
859 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
860 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
861 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
862 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
863 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
864 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
865 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
866 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
867 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
868 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
869 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
870 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
871 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
872 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
873 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
874 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
875 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
876 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
877 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
878 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
879 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
880 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
881 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
882 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
883 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
884 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
885 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
886 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
887 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
888 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
889 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
890 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
891 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
892 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
893 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
894 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
895 
896 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
897 #define DO_VADC(N, M, C) (N + M + C)
898 #define DO_VSBC(N, M, C) (N - M - C)
899 
900 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
901 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
902                   CPURISCVState *env, uint32_t desc)          \
903 {                                                             \
904     uint32_t vl = env->vl;                                    \
905     uint32_t i;                                               \
906                                                               \
907     for (i = 0; i < vl; i++) {                                \
908         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
909         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
910         uint8_t carry = vext_elem_mask(v0, i);                \
911                                                               \
912         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
913     }                                                         \
914 }
915 
916 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
917 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
918 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
919 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
920 
921 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
922 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
923 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
924 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
925 
926 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
927 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
928                   CPURISCVState *env, uint32_t desc)                     \
929 {                                                                        \
930     uint32_t vl = env->vl;                                               \
931     uint32_t i;                                                          \
932                                                                          \
933     for (i = 0; i < vl; i++) {                                           \
934         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
935         uint8_t carry = vext_elem_mask(v0, i);                           \
936                                                                          \
937         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
938     }                                                                    \
939 }
940 
941 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
942 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
943 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
944 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
945 
946 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
947 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
948 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
949 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
950 
951 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
952                           (__typeof(N))(N + M) < N)
953 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
954 
955 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
956 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
957                   CPURISCVState *env, uint32_t desc)          \
958 {                                                             \
959     uint32_t vl = env->vl;                                    \
960     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
961     uint32_t i;                                               \
962                                                               \
963     for (i = 0; i < vl; i++) {                                \
964         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
965         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
966         uint8_t carry = vext_elem_mask(v0, i);                \
967                                                               \
968         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
969     }                                                         \
970     for (; i < vlmax; i++) {                                  \
971         vext_set_elem_mask(vd, i, 0);                         \
972     }                                                         \
973 }
974 
975 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
976 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
977 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
978 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
979 
980 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
981 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
982 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
983 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
984 
985 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
986 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
987                   void *vs2, CPURISCVState *env, uint32_t desc) \
988 {                                                               \
989     uint32_t vl = env->vl;                                      \
990     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);          \
991     uint32_t i;                                                 \
992                                                                 \
993     for (i = 0; i < vl; i++) {                                  \
994         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
995         uint8_t carry = vext_elem_mask(v0, i);                  \
996                                                                 \
997         vext_set_elem_mask(vd, i,                               \
998                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
999     }                                                           \
1000     for (; i < vlmax; i++) {                                    \
1001         vext_set_elem_mask(vd, i, 0);                           \
1002     }                                                           \
1003 }
1004 
1005 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1006 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1007 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1008 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1009 
1010 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1011 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1012 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1013 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1014 
1015 /* Vector Bitwise Logical Instructions */
1016 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1017 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1018 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1019 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1020 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1021 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1022 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1023 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1024 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1025 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1026 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1027 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1028 GEN_VEXT_VV(vand_vv_b, 1, 1)
1029 GEN_VEXT_VV(vand_vv_h, 2, 2)
1030 GEN_VEXT_VV(vand_vv_w, 4, 4)
1031 GEN_VEXT_VV(vand_vv_d, 8, 8)
1032 GEN_VEXT_VV(vor_vv_b, 1, 1)
1033 GEN_VEXT_VV(vor_vv_h, 2, 2)
1034 GEN_VEXT_VV(vor_vv_w, 4, 4)
1035 GEN_VEXT_VV(vor_vv_d, 8, 8)
1036 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1037 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1038 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1039 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1040 
1041 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1042 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1043 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1044 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1045 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1046 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1047 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1048 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1049 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1050 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1051 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1052 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1053 GEN_VEXT_VX(vand_vx_b, 1, 1)
1054 GEN_VEXT_VX(vand_vx_h, 2, 2)
1055 GEN_VEXT_VX(vand_vx_w, 4, 4)
1056 GEN_VEXT_VX(vand_vx_d, 8, 8)
1057 GEN_VEXT_VX(vor_vx_b, 1, 1)
1058 GEN_VEXT_VX(vor_vx_h, 2, 2)
1059 GEN_VEXT_VX(vor_vx_w, 4, 4)
1060 GEN_VEXT_VX(vor_vx_d, 8, 8)
1061 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1062 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1063 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1064 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1065 
1066 /* Vector Single-Width Bit Shift Instructions */
1067 #define DO_SLL(N, M)  (N << (M))
1068 #define DO_SRL(N, M)  (N >> (M))
1069 
1070 /* generate the helpers for shift instructions with two vector operators */
1071 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1072 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1073                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1074 {                                                                         \
1075     uint32_t vm = vext_vm(desc);                                          \
1076     uint32_t vl = env->vl;                                                \
1077     uint32_t i;                                                           \
1078                                                                           \
1079     for (i = 0; i < vl; i++) {                                            \
1080         if (!vm && !vext_elem_mask(v0, i)) {                              \
1081             continue;                                                     \
1082         }                                                                 \
1083         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1084         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1085         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1086     }                                                                     \
1087 }
1088 
1089 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1090 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1091 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1092 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1093 
1094 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1095 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1096 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1097 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1098 
1099 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1100 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1101 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1102 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1103 
1104 /* generate the helpers for shift instructions with one vector and one scalar */
1105 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1106 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1107         void *vs2, CPURISCVState *env, uint32_t desc)       \
1108 {                                                           \
1109     uint32_t vm = vext_vm(desc);                            \
1110     uint32_t vl = env->vl;                                  \
1111     uint32_t i;                                             \
1112                                                             \
1113     for (i = 0; i < vl; i++) {                              \
1114         if (!vm && !vext_elem_mask(v0, i)) {                \
1115             continue;                                       \
1116         }                                                   \
1117         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1118         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1119     }                                                       \
1120 }
1121 
1122 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1123 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1124 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1125 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1126 
1127 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1128 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1129 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1130 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1131 
1132 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1133 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1134 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1135 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1136 
1137 /* Vector Narrowing Integer Right Shift Instructions */
1138 GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1139 GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1140 GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1141 GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1142 GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1143 GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1144 GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1145 GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1146 GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1147 GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1148 GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1149 GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1150 
1151 /* Vector Integer Comparison Instructions */
1152 #define DO_MSEQ(N, M) (N == M)
1153 #define DO_MSNE(N, M) (N != M)
1154 #define DO_MSLT(N, M) (N < M)
1155 #define DO_MSLE(N, M) (N <= M)
1156 #define DO_MSGT(N, M) (N > M)
1157 
1158 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1159 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1160                   CPURISCVState *env, uint32_t desc)          \
1161 {                                                             \
1162     uint32_t vm = vext_vm(desc);                              \
1163     uint32_t vl = env->vl;                                    \
1164     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
1165     uint32_t i;                                               \
1166                                                               \
1167     for (i = 0; i < vl; i++) {                                \
1168         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1169         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1170         if (!vm && !vext_elem_mask(v0, i)) {                  \
1171             continue;                                         \
1172         }                                                     \
1173         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1174     }                                                         \
1175     for (; i < vlmax; i++) {                                  \
1176         vext_set_elem_mask(vd, i, 0);                         \
1177     }                                                         \
1178 }
1179 
1180 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1181 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1182 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1183 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1184 
1185 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1186 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1187 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1188 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1189 
1190 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1191 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1192 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1193 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1194 
1195 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1196 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1197 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1198 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1199 
1200 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1201 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1202 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1203 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1204 
1205 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1206 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1207 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1208 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1209 
1210 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1211 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1212                   CPURISCVState *env, uint32_t desc)                \
1213 {                                                                   \
1214     uint32_t vm = vext_vm(desc);                                    \
1215     uint32_t vl = env->vl;                                          \
1216     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
1217     uint32_t i;                                                     \
1218                                                                     \
1219     for (i = 0; i < vl; i++) {                                      \
1220         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1221         if (!vm && !vext_elem_mask(v0, i)) {                        \
1222             continue;                                               \
1223         }                                                           \
1224         vext_set_elem_mask(vd, i,                                   \
1225                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1226     }                                                               \
1227     for (; i < vlmax; i++) {                                        \
1228         vext_set_elem_mask(vd, i, 0);                               \
1229     }                                                               \
1230 }
1231 
1232 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1233 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1234 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1235 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1236 
1237 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1238 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1239 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1240 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1241 
1242 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1243 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1244 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1245 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1246 
1247 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1248 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1249 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1250 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1251 
1252 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1253 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1254 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1255 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1256 
1257 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1258 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1259 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1260 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1261 
1262 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1263 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1264 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1265 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1266 
1267 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1268 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1269 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1270 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1271 
1272 /* Vector Integer Min/Max Instructions */
1273 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1274 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1275 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1276 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1277 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1278 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1279 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1280 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1281 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1282 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1283 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1284 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1285 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1286 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1287 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1288 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1289 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1290 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1291 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1292 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1293 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1294 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1295 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1296 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1297 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1298 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1299 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1300 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1301 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1302 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1303 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1304 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1305 
1306 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1307 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1308 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1309 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1310 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1311 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1312 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1313 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1314 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1315 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1316 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1317 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1318 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1319 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1320 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1321 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1322 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1323 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1324 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1325 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1326 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1327 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1328 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1329 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1330 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1331 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1332 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1333 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1334 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1335 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1336 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1337 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1338 
1339 /* Vector Single-Width Integer Multiply Instructions */
1340 #define DO_MUL(N, M) (N * M)
1341 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1342 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1343 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1344 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1345 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1346 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1347 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1348 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1349 
1350 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1351 {
1352     return (int16_t)s2 * (int16_t)s1 >> 8;
1353 }
1354 
1355 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1356 {
1357     return (int32_t)s2 * (int32_t)s1 >> 16;
1358 }
1359 
1360 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1361 {
1362     return (int64_t)s2 * (int64_t)s1 >> 32;
1363 }
1364 
1365 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1366 {
1367     uint64_t hi_64, lo_64;
1368 
1369     muls64(&lo_64, &hi_64, s1, s2);
1370     return hi_64;
1371 }
1372 
1373 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1374 {
1375     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1376 }
1377 
1378 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1379 {
1380     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1381 }
1382 
1383 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1384 {
1385     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1386 }
1387 
1388 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1389 {
1390     uint64_t hi_64, lo_64;
1391 
1392     mulu64(&lo_64, &hi_64, s2, s1);
1393     return hi_64;
1394 }
1395 
1396 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1397 {
1398     return (int16_t)s2 * (uint16_t)s1 >> 8;
1399 }
1400 
1401 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1402 {
1403     return (int32_t)s2 * (uint32_t)s1 >> 16;
1404 }
1405 
1406 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1407 {
1408     return (int64_t)s2 * (uint64_t)s1 >> 32;
1409 }
1410 
1411 /*
1412  * Let  A = signed operand,
1413  *      B = unsigned operand
1414  *      P = mulu64(A, B), unsigned product
1415  *
1416  * LET  X = 2 ** 64  - A, 2's complement of A
1417  *      SP = signed product
1418  * THEN
1419  *      IF A < 0
1420  *          SP = -X * B
1421  *             = -(2 ** 64 - A) * B
1422  *             = A * B - 2 ** 64 * B
1423  *             = P - 2 ** 64 * B
1424  *      ELSE
1425  *          SP = P
1426  * THEN
1427  *      HI_P -= (A < 0 ? B : 0)
1428  */
1429 
1430 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1431 {
1432     uint64_t hi_64, lo_64;
1433 
1434     mulu64(&lo_64, &hi_64, s2, s1);
1435 
1436     hi_64 -= s2 < 0 ? s1 : 0;
1437     return hi_64;
1438 }
1439 
1440 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1441 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1442 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1443 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1444 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1445 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1446 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1447 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1448 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1449 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1450 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1451 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1452 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1453 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1454 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1455 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1456 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1457 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1458 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1459 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1460 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1461 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1462 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1463 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1464 
1465 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1466 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1467 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1468 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1469 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1470 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1471 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1472 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1473 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1474 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1475 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1476 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1477 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1478 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1479 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1480 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1481 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1482 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1483 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1484 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1485 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1486 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1487 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1488 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1489 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1490 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1491 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1492 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1493 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1494 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1495 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1496 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1497 
1498 /* Vector Integer Divide Instructions */
1499 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1500 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1501 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1502         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1503 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1504         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1505 
1506 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1507 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1508 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1509 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1510 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1511 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1512 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1513 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1514 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1515 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1516 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1517 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1518 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1519 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1520 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1521 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1522 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1523 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1524 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1525 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1526 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1527 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1528 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1529 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1530 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1531 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1532 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1533 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1534 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1535 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1536 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1537 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1538 
1539 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1540 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1541 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1542 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1543 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1544 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1545 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1546 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1547 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1548 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1549 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1550 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1551 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1552 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1553 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1554 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1555 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1556 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1557 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1558 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1559 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1560 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1561 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1562 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1563 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1564 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1565 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1566 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1567 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1568 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1569 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1570 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1571 
1572 /* Vector Widening Integer Multiply Instructions */
1573 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1574 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1575 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1576 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1577 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1578 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1579 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1580 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1581 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1582 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1583 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1584 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1585 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1586 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1587 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1588 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1589 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1590 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1591 
1592 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1593 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1594 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1595 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1596 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1597 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1598 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1599 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1600 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1601 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1602 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1603 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1604 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1605 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1606 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1607 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1608 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1609 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1610 
1611 /* Vector Single-Width Integer Multiply-Add Instructions */
1612 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1613 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1614 {                                                                  \
1615     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1616     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1617     TD d = *((TD *)vd + HD(i));                                    \
1618     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1619 }
1620 
1621 #define DO_MACC(N, M, D) (M * N + D)
1622 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1623 #define DO_MADD(N, M, D) (M * D + N)
1624 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1625 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1626 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1627 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1628 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1629 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1630 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1631 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1632 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1633 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1634 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1635 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1636 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1637 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1638 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1639 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1640 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1641 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1642 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1643 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1644 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1645 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1646 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1647 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1648 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1649 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1650 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1651 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1652 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1653 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1654 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1655 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1656 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1657 
1658 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1659 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1660 {                                                                   \
1661     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1662     TD d = *((TD *)vd + HD(i));                                     \
1663     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1664 }
1665 
1666 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1667 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1668 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1669 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1670 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1671 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1672 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1673 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1674 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1675 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1676 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1677 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1678 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1679 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1680 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1681 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1682 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1683 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1684 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1685 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1686 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1687 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1688 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1689 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1690 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1691 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1692 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1693 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1694 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1695 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1696 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1697 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1698 
1699 /* Vector Widening Integer Multiply-Add Instructions */
1700 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1701 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1702 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1703 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1704 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1705 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1706 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1707 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1708 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1709 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1710 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1711 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1712 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1713 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1714 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1715 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1716 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1717 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1718 
1719 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1720 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1721 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1722 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1723 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1724 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1725 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1726 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1727 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1728 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1729 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1730 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1731 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1732 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1733 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1734 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1735 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1736 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1737 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1738 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1739 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1740 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1741 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1742 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1743 
1744 /* Vector Integer Merge and Move Instructions */
1745 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1746 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1747                   uint32_t desc)                                     \
1748 {                                                                    \
1749     uint32_t vl = env->vl;                                           \
1750     uint32_t i;                                                      \
1751                                                                      \
1752     for (i = 0; i < vl; i++) {                                       \
1753         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1754         *((ETYPE *)vd + H(i)) = s1;                                  \
1755     }                                                                \
1756 }
1757 
1758 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1759 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1760 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1761 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1762 
1763 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1764 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1765                   uint32_t desc)                                     \
1766 {                                                                    \
1767     uint32_t vl = env->vl;                                           \
1768     uint32_t i;                                                      \
1769                                                                      \
1770     for (i = 0; i < vl; i++) {                                       \
1771         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1772     }                                                                \
1773 }
1774 
1775 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1776 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1777 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1778 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1779 
1780 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1781 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1782                   CPURISCVState *env, uint32_t desc)                 \
1783 {                                                                    \
1784     uint32_t vl = env->vl;                                           \
1785     uint32_t i;                                                      \
1786                                                                      \
1787     for (i = 0; i < vl; i++) {                                       \
1788         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1789         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1790     }                                                                \
1791 }
1792 
1793 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1794 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1795 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1796 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1797 
1798 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1799 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1800                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1801 {                                                                    \
1802     uint32_t vl = env->vl;                                           \
1803     uint32_t i;                                                      \
1804                                                                      \
1805     for (i = 0; i < vl; i++) {                                       \
1806         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1807         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1808                    (ETYPE)(target_long)s1);                          \
1809         *((ETYPE *)vd + H(i)) = d;                                   \
1810     }                                                                \
1811 }
1812 
1813 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1814 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1815 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1816 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1817 
1818 /*
1819  *** Vector Fixed-Point Arithmetic Instructions
1820  */
1821 
1822 /* Vector Single-Width Saturating Add and Subtract */
1823 
1824 /*
1825  * As fixed point instructions probably have round mode and saturation,
1826  * define common macros for fixed point here.
1827  */
1828 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1829                           CPURISCVState *env, int vxrm);
1830 
1831 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1832 static inline void                                                  \
1833 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1834           CPURISCVState *env, int vxrm)                             \
1835 {                                                                   \
1836     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1837     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1838     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1839 }
1840 
1841 static inline void
1842 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1843              CPURISCVState *env,
1844              uint32_t vl, uint32_t vm, int vxrm,
1845              opivv2_rm_fn *fn)
1846 {
1847     for (uint32_t i = 0; i < vl; i++) {
1848         if (!vm && !vext_elem_mask(v0, i)) {
1849             continue;
1850         }
1851         fn(vd, vs1, vs2, i, env, vxrm);
1852     }
1853 }
1854 
1855 static inline void
1856 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1857              CPURISCVState *env,
1858              uint32_t desc, uint32_t esz, uint32_t dsz,
1859              opivv2_rm_fn *fn)
1860 {
1861     uint32_t vm = vext_vm(desc);
1862     uint32_t vl = env->vl;
1863 
1864     switch (env->vxrm) {
1865     case 0: /* rnu */
1866         vext_vv_rm_1(vd, v0, vs1, vs2,
1867                      env, vl, vm, 0, fn);
1868         break;
1869     case 1: /* rne */
1870         vext_vv_rm_1(vd, v0, vs1, vs2,
1871                      env, vl, vm, 1, fn);
1872         break;
1873     case 2: /* rdn */
1874         vext_vv_rm_1(vd, v0, vs1, vs2,
1875                      env, vl, vm, 2, fn);
1876         break;
1877     default: /* rod */
1878         vext_vv_rm_1(vd, v0, vs1, vs2,
1879                      env, vl, vm, 3, fn);
1880         break;
1881     }
1882 }
1883 
1884 /* generate helpers for fixed point instructions with OPIVV format */
1885 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1886 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1887                   CPURISCVState *env, uint32_t desc)            \
1888 {                                                               \
1889     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1890                  do_##NAME);                                    \
1891 }
1892 
1893 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1894 {
1895     uint8_t res = a + b;
1896     if (res < a) {
1897         res = UINT8_MAX;
1898         env->vxsat = 0x1;
1899     }
1900     return res;
1901 }
1902 
1903 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1904                                uint16_t b)
1905 {
1906     uint16_t res = a + b;
1907     if (res < a) {
1908         res = UINT16_MAX;
1909         env->vxsat = 0x1;
1910     }
1911     return res;
1912 }
1913 
1914 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1915                                uint32_t b)
1916 {
1917     uint32_t res = a + b;
1918     if (res < a) {
1919         res = UINT32_MAX;
1920         env->vxsat = 0x1;
1921     }
1922     return res;
1923 }
1924 
1925 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1926                                uint64_t b)
1927 {
1928     uint64_t res = a + b;
1929     if (res < a) {
1930         res = UINT64_MAX;
1931         env->vxsat = 0x1;
1932     }
1933     return res;
1934 }
1935 
1936 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1937 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1938 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
1939 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
1940 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
1941 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
1942 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
1943 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
1944 
1945 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
1946                           CPURISCVState *env, int vxrm);
1947 
1948 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
1949 static inline void                                                  \
1950 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
1951           CPURISCVState *env, int vxrm)                             \
1952 {                                                                   \
1953     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1954     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
1955 }
1956 
1957 static inline void
1958 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
1959              CPURISCVState *env,
1960              uint32_t vl, uint32_t vm, int vxrm,
1961              opivx2_rm_fn *fn)
1962 {
1963     for (uint32_t i = 0; i < vl; i++) {
1964         if (!vm && !vext_elem_mask(v0, i)) {
1965             continue;
1966         }
1967         fn(vd, s1, vs2, i, env, vxrm);
1968     }
1969 }
1970 
1971 static inline void
1972 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
1973              CPURISCVState *env,
1974              uint32_t desc, uint32_t esz, uint32_t dsz,
1975              opivx2_rm_fn *fn)
1976 {
1977     uint32_t vm = vext_vm(desc);
1978     uint32_t vl = env->vl;
1979 
1980     switch (env->vxrm) {
1981     case 0: /* rnu */
1982         vext_vx_rm_1(vd, v0, s1, vs2,
1983                      env, vl, vm, 0, fn);
1984         break;
1985     case 1: /* rne */
1986         vext_vx_rm_1(vd, v0, s1, vs2,
1987                      env, vl, vm, 1, fn);
1988         break;
1989     case 2: /* rdn */
1990         vext_vx_rm_1(vd, v0, s1, vs2,
1991                      env, vl, vm, 2, fn);
1992         break;
1993     default: /* rod */
1994         vext_vx_rm_1(vd, v0, s1, vs2,
1995                      env, vl, vm, 3, fn);
1996         break;
1997     }
1998 }
1999 
2000 /* generate helpers for fixed point instructions with OPIVX format */
2001 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2002 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2003         void *vs2, CPURISCVState *env, uint32_t desc)     \
2004 {                                                         \
2005     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2006                  do_##NAME);                              \
2007 }
2008 
2009 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2010 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2011 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2012 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2013 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2014 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2015 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2016 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2017 
2018 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2019 {
2020     int8_t res = a + b;
2021     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2022         res = a > 0 ? INT8_MAX : INT8_MIN;
2023         env->vxsat = 0x1;
2024     }
2025     return res;
2026 }
2027 
2028 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2029 {
2030     int16_t res = a + b;
2031     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2032         res = a > 0 ? INT16_MAX : INT16_MIN;
2033         env->vxsat = 0x1;
2034     }
2035     return res;
2036 }
2037 
2038 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2039 {
2040     int32_t res = a + b;
2041     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2042         res = a > 0 ? INT32_MAX : INT32_MIN;
2043         env->vxsat = 0x1;
2044     }
2045     return res;
2046 }
2047 
2048 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2049 {
2050     int64_t res = a + b;
2051     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2052         res = a > 0 ? INT64_MAX : INT64_MIN;
2053         env->vxsat = 0x1;
2054     }
2055     return res;
2056 }
2057 
2058 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2059 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2060 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2061 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2062 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2063 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2064 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2065 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2066 
2067 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2068 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2069 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2070 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2071 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2072 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2073 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2074 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2075 
2076 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2077 {
2078     uint8_t res = a - b;
2079     if (res > a) {
2080         res = 0;
2081         env->vxsat = 0x1;
2082     }
2083     return res;
2084 }
2085 
2086 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2087                                uint16_t b)
2088 {
2089     uint16_t res = a - b;
2090     if (res > a) {
2091         res = 0;
2092         env->vxsat = 0x1;
2093     }
2094     return res;
2095 }
2096 
2097 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2098                                uint32_t b)
2099 {
2100     uint32_t res = a - b;
2101     if (res > a) {
2102         res = 0;
2103         env->vxsat = 0x1;
2104     }
2105     return res;
2106 }
2107 
2108 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2109                                uint64_t b)
2110 {
2111     uint64_t res = a - b;
2112     if (res > a) {
2113         res = 0;
2114         env->vxsat = 0x1;
2115     }
2116     return res;
2117 }
2118 
2119 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2120 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2121 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2122 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2123 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2124 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2125 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2126 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2127 
2128 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2129 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2130 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2131 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2132 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2133 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2134 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2135 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2136 
2137 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2138 {
2139     int8_t res = a - b;
2140     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2141         res = a >= 0 ? INT8_MAX : INT8_MIN;
2142         env->vxsat = 0x1;
2143     }
2144     return res;
2145 }
2146 
2147 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2148 {
2149     int16_t res = a - b;
2150     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2151         res = a >= 0 ? INT16_MAX : INT16_MIN;
2152         env->vxsat = 0x1;
2153     }
2154     return res;
2155 }
2156 
2157 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2158 {
2159     int32_t res = a - b;
2160     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2161         res = a >= 0 ? INT32_MAX : INT32_MIN;
2162         env->vxsat = 0x1;
2163     }
2164     return res;
2165 }
2166 
2167 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2168 {
2169     int64_t res = a - b;
2170     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2171         res = a >= 0 ? INT64_MAX : INT64_MIN;
2172         env->vxsat = 0x1;
2173     }
2174     return res;
2175 }
2176 
2177 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2178 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2179 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2180 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2181 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2182 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2183 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2184 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2185 
2186 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2187 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2188 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2189 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2190 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2191 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2192 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2193 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2194 
2195 /* Vector Single-Width Averaging Add and Subtract */
2196 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2197 {
2198     uint8_t d = extract64(v, shift, 1);
2199     uint8_t d1;
2200     uint64_t D1, D2;
2201 
2202     if (shift == 0 || shift > 64) {
2203         return 0;
2204     }
2205 
2206     d1 = extract64(v, shift - 1, 1);
2207     D1 = extract64(v, 0, shift);
2208     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2209         return d1;
2210     } else if (vxrm == 1) { /* round-to-nearest-even */
2211         if (shift > 1) {
2212             D2 = extract64(v, 0, shift - 1);
2213             return d1 & ((D2 != 0) | d);
2214         } else {
2215             return d1 & d;
2216         }
2217     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2218         return !d & (D1 != 0);
2219     }
2220     return 0; /* round-down (truncate) */
2221 }
2222 
2223 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2224 {
2225     int64_t res = (int64_t)a + b;
2226     uint8_t round = get_round(vxrm, res, 1);
2227 
2228     return (res >> 1) + round;
2229 }
2230 
2231 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2232 {
2233     int64_t res = a + b;
2234     uint8_t round = get_round(vxrm, res, 1);
2235     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2236 
2237     /* With signed overflow, bit 64 is inverse of bit 63. */
2238     return ((res >> 1) ^ over) + round;
2239 }
2240 
2241 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2242 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2243 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2244 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2245 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2246 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2247 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2248 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2249 
2250 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2251 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2252 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2253 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2254 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2255 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2256 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2257 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2258 
2259 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2260 {
2261     int64_t res = (int64_t)a - b;
2262     uint8_t round = get_round(vxrm, res, 1);
2263 
2264     return (res >> 1) + round;
2265 }
2266 
2267 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2268 {
2269     int64_t res = (int64_t)a - b;
2270     uint8_t round = get_round(vxrm, res, 1);
2271     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2272 
2273     /* With signed overflow, bit 64 is inverse of bit 63. */
2274     return ((res >> 1) ^ over) + round;
2275 }
2276 
2277 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2278 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2279 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2280 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2281 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2282 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2283 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2284 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2285 
2286 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2287 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2288 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2289 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2290 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2291 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2292 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2293 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2294 
2295 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2296 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2297 {
2298     uint8_t round;
2299     int16_t res;
2300 
2301     res = (int16_t)a * (int16_t)b;
2302     round = get_round(vxrm, res, 7);
2303     res   = (res >> 7) + round;
2304 
2305     if (res > INT8_MAX) {
2306         env->vxsat = 0x1;
2307         return INT8_MAX;
2308     } else if (res < INT8_MIN) {
2309         env->vxsat = 0x1;
2310         return INT8_MIN;
2311     } else {
2312         return res;
2313     }
2314 }
2315 
2316 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2317 {
2318     uint8_t round;
2319     int32_t res;
2320 
2321     res = (int32_t)a * (int32_t)b;
2322     round = get_round(vxrm, res, 15);
2323     res   = (res >> 15) + round;
2324 
2325     if (res > INT16_MAX) {
2326         env->vxsat = 0x1;
2327         return INT16_MAX;
2328     } else if (res < INT16_MIN) {
2329         env->vxsat = 0x1;
2330         return INT16_MIN;
2331     } else {
2332         return res;
2333     }
2334 }
2335 
2336 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2337 {
2338     uint8_t round;
2339     int64_t res;
2340 
2341     res = (int64_t)a * (int64_t)b;
2342     round = get_round(vxrm, res, 31);
2343     res   = (res >> 31) + round;
2344 
2345     if (res > INT32_MAX) {
2346         env->vxsat = 0x1;
2347         return INT32_MAX;
2348     } else if (res < INT32_MIN) {
2349         env->vxsat = 0x1;
2350         return INT32_MIN;
2351     } else {
2352         return res;
2353     }
2354 }
2355 
2356 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2357 {
2358     uint8_t round;
2359     uint64_t hi_64, lo_64;
2360     int64_t res;
2361 
2362     if (a == INT64_MIN && b == INT64_MIN) {
2363         env->vxsat = 1;
2364         return INT64_MAX;
2365     }
2366 
2367     muls64(&lo_64, &hi_64, a, b);
2368     round = get_round(vxrm, lo_64, 63);
2369     /*
2370      * Cannot overflow, as there are always
2371      * 2 sign bits after multiply.
2372      */
2373     res = (hi_64 << 1) | (lo_64 >> 63);
2374     if (round) {
2375         if (res == INT64_MAX) {
2376             env->vxsat = 1;
2377         } else {
2378             res += 1;
2379         }
2380     }
2381     return res;
2382 }
2383 
2384 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2385 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2386 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2387 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2388 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2389 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2390 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2391 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2392 
2393 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2394 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2395 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2396 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2397 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2398 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2399 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2400 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2401 
2402 /* Vector Widening Saturating Scaled Multiply-Add */
2403 static inline uint16_t
2404 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b,
2405           uint16_t c)
2406 {
2407     uint8_t round;
2408     uint16_t res = (uint16_t)a * b;
2409 
2410     round = get_round(vxrm, res, 4);
2411     res   = (res >> 4) + round;
2412     return saddu16(env, vxrm, c, res);
2413 }
2414 
2415 static inline uint32_t
2416 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b,
2417            uint32_t c)
2418 {
2419     uint8_t round;
2420     uint32_t res = (uint32_t)a * b;
2421 
2422     round = get_round(vxrm, res, 8);
2423     res   = (res >> 8) + round;
2424     return saddu32(env, vxrm, c, res);
2425 }
2426 
2427 static inline uint64_t
2428 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b,
2429            uint64_t c)
2430 {
2431     uint8_t round;
2432     uint64_t res = (uint64_t)a * b;
2433 
2434     round = get_round(vxrm, res, 16);
2435     res   = (res >> 16) + round;
2436     return saddu64(env, vxrm, c, res);
2437 }
2438 
2439 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
2440 static inline void                                                 \
2441 do_##NAME(void *vd, void *vs1, void *vs2, int i,                   \
2442           CPURISCVState *env, int vxrm)                            \
2443 {                                                                  \
2444     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
2445     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
2446     TD d = *((TD *)vd + HD(i));                                    \
2447     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d);                \
2448 }
2449 
2450 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8)
2451 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16)
2452 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32)
2453 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2)
2454 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4)
2455 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8)
2456 
2457 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)         \
2458 static inline void                                                 \
2459 do_##NAME(void *vd, target_long s1, void *vs2, int i,              \
2460           CPURISCVState *env, int vxrm)                            \
2461 {                                                                  \
2462     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
2463     TD d = *((TD *)vd + HD(i));                                    \
2464     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d);       \
2465 }
2466 
2467 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8)
2468 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16)
2469 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32)
2470 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2)
2471 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4)
2472 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8)
2473 
2474 static inline int16_t
2475 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c)
2476 {
2477     uint8_t round;
2478     int16_t res = (int16_t)a * b;
2479 
2480     round = get_round(vxrm, res, 4);
2481     res   = (res >> 4) + round;
2482     return sadd16(env, vxrm, c, res);
2483 }
2484 
2485 static inline int32_t
2486 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c)
2487 {
2488     uint8_t round;
2489     int32_t res = (int32_t)a * b;
2490 
2491     round = get_round(vxrm, res, 8);
2492     res   = (res >> 8) + round;
2493     return sadd32(env, vxrm, c, res);
2494 
2495 }
2496 
2497 static inline int64_t
2498 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c)
2499 {
2500     uint8_t round;
2501     int64_t res = (int64_t)a * b;
2502 
2503     round = get_round(vxrm, res, 16);
2504     res   = (res >> 16) + round;
2505     return sadd64(env, vxrm, c, res);
2506 }
2507 
2508 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8)
2509 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16)
2510 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32)
2511 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2)
2512 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4)
2513 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8)
2514 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8)
2515 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16)
2516 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32)
2517 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2)
2518 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4)
2519 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8)
2520 
2521 static inline int16_t
2522 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c)
2523 {
2524     uint8_t round;
2525     int16_t res = a * (int16_t)b;
2526 
2527     round = get_round(vxrm, res, 4);
2528     res   = (res >> 4) + round;
2529     return ssub16(env, vxrm, c, res);
2530 }
2531 
2532 static inline int32_t
2533 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c)
2534 {
2535     uint8_t round;
2536     int32_t res = a * (int32_t)b;
2537 
2538     round = get_round(vxrm, res, 8);
2539     res   = (res >> 8) + round;
2540     return ssub32(env, vxrm, c, res);
2541 }
2542 
2543 static inline int64_t
2544 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c)
2545 {
2546     uint8_t round;
2547     int64_t res = a * (int64_t)b;
2548 
2549     round = get_round(vxrm, res, 16);
2550     res   = (res >> 16) + round;
2551     return ssub64(env, vxrm, c, res);
2552 }
2553 
2554 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8)
2555 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16)
2556 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32)
2557 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2)
2558 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4)
2559 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8)
2560 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8)
2561 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16)
2562 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32)
2563 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2)
2564 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4)
2565 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8)
2566 
2567 static inline int16_t
2568 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c)
2569 {
2570     uint8_t round;
2571     int16_t res = (int16_t)a * b;
2572 
2573     round = get_round(vxrm, res, 4);
2574     res   = (res >> 4) + round;
2575     return ssub16(env, vxrm, c, res);
2576 }
2577 
2578 static inline int32_t
2579 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c)
2580 {
2581     uint8_t round;
2582     int32_t res = (int32_t)a * b;
2583 
2584     round = get_round(vxrm, res, 8);
2585     res   = (res >> 8) + round;
2586     return ssub32(env, vxrm, c, res);
2587 }
2588 
2589 static inline int64_t
2590 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c)
2591 {
2592     uint8_t round;
2593     int64_t res = (int64_t)a * b;
2594 
2595     round = get_round(vxrm, res, 16);
2596     res   = (res >> 16) + round;
2597     return ssub64(env, vxrm, c, res);
2598 }
2599 
2600 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8)
2601 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16)
2602 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32)
2603 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2)
2604 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4)
2605 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8)
2606 
2607 /* Vector Single-Width Scaling Shift Instructions */
2608 static inline uint8_t
2609 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2610 {
2611     uint8_t round, shift = b & 0x7;
2612     uint8_t res;
2613 
2614     round = get_round(vxrm, a, shift);
2615     res   = (a >> shift)  + round;
2616     return res;
2617 }
2618 static inline uint16_t
2619 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2620 {
2621     uint8_t round, shift = b & 0xf;
2622     uint16_t res;
2623 
2624     round = get_round(vxrm, a, shift);
2625     res   = (a >> shift)  + round;
2626     return res;
2627 }
2628 static inline uint32_t
2629 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2630 {
2631     uint8_t round, shift = b & 0x1f;
2632     uint32_t res;
2633 
2634     round = get_round(vxrm, a, shift);
2635     res   = (a >> shift)  + round;
2636     return res;
2637 }
2638 static inline uint64_t
2639 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2640 {
2641     uint8_t round, shift = b & 0x3f;
2642     uint64_t res;
2643 
2644     round = get_round(vxrm, a, shift);
2645     res   = (a >> shift)  + round;
2646     return res;
2647 }
2648 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2649 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2650 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2651 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2652 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2653 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2654 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2655 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2656 
2657 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2658 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2659 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2660 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2661 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2662 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2663 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2664 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2665 
2666 static inline int8_t
2667 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2668 {
2669     uint8_t round, shift = b & 0x7;
2670     int8_t res;
2671 
2672     round = get_round(vxrm, a, shift);
2673     res   = (a >> shift)  + round;
2674     return res;
2675 }
2676 static inline int16_t
2677 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2678 {
2679     uint8_t round, shift = b & 0xf;
2680     int16_t res;
2681 
2682     round = get_round(vxrm, a, shift);
2683     res   = (a >> shift)  + round;
2684     return res;
2685 }
2686 static inline int32_t
2687 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2688 {
2689     uint8_t round, shift = b & 0x1f;
2690     int32_t res;
2691 
2692     round = get_round(vxrm, a, shift);
2693     res   = (a >> shift)  + round;
2694     return res;
2695 }
2696 static inline int64_t
2697 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2698 {
2699     uint8_t round, shift = b & 0x3f;
2700     int64_t res;
2701 
2702     round = get_round(vxrm, a, shift);
2703     res   = (a >> shift)  + round;
2704     return res;
2705 }
2706 
2707 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2708 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2709 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2710 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2711 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2712 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2713 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2714 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2715 
2716 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2717 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2718 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2719 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2720 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2721 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2722 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2723 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2724 
2725 /* Vector Narrowing Fixed-Point Clip Instructions */
2726 static inline int8_t
2727 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2728 {
2729     uint8_t round, shift = b & 0xf;
2730     int16_t res;
2731 
2732     round = get_round(vxrm, a, shift);
2733     res   = (a >> shift)  + round;
2734     if (res > INT8_MAX) {
2735         env->vxsat = 0x1;
2736         return INT8_MAX;
2737     } else if (res < INT8_MIN) {
2738         env->vxsat = 0x1;
2739         return INT8_MIN;
2740     } else {
2741         return res;
2742     }
2743 }
2744 
2745 static inline int16_t
2746 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2747 {
2748     uint8_t round, shift = b & 0x1f;
2749     int32_t res;
2750 
2751     round = get_round(vxrm, a, shift);
2752     res   = (a >> shift)  + round;
2753     if (res > INT16_MAX) {
2754         env->vxsat = 0x1;
2755         return INT16_MAX;
2756     } else if (res < INT16_MIN) {
2757         env->vxsat = 0x1;
2758         return INT16_MIN;
2759     } else {
2760         return res;
2761     }
2762 }
2763 
2764 static inline int32_t
2765 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2766 {
2767     uint8_t round, shift = b & 0x3f;
2768     int64_t res;
2769 
2770     round = get_round(vxrm, a, shift);
2771     res   = (a >> shift)  + round;
2772     if (res > INT32_MAX) {
2773         env->vxsat = 0x1;
2774         return INT32_MAX;
2775     } else if (res < INT32_MIN) {
2776         env->vxsat = 0x1;
2777         return INT32_MIN;
2778     } else {
2779         return res;
2780     }
2781 }
2782 
2783 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2784 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2785 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2786 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1)
2787 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2)
2788 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4)
2789 
2790 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8)
2791 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16)
2792 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32)
2793 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1)
2794 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2)
2795 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4)
2796 
2797 static inline uint8_t
2798 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2799 {
2800     uint8_t round, shift = b & 0xf;
2801     uint16_t res;
2802 
2803     round = get_round(vxrm, a, shift);
2804     res   = (a >> shift)  + round;
2805     if (res > UINT8_MAX) {
2806         env->vxsat = 0x1;
2807         return UINT8_MAX;
2808     } else {
2809         return res;
2810     }
2811 }
2812 
2813 static inline uint16_t
2814 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2815 {
2816     uint8_t round, shift = b & 0x1f;
2817     uint32_t res;
2818 
2819     round = get_round(vxrm, a, shift);
2820     res   = (a >> shift)  + round;
2821     if (res > UINT16_MAX) {
2822         env->vxsat = 0x1;
2823         return UINT16_MAX;
2824     } else {
2825         return res;
2826     }
2827 }
2828 
2829 static inline uint32_t
2830 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2831 {
2832     uint8_t round, shift = b & 0x3f;
2833     int64_t res;
2834 
2835     round = get_round(vxrm, a, shift);
2836     res   = (a >> shift)  + round;
2837     if (res > UINT32_MAX) {
2838         env->vxsat = 0x1;
2839         return UINT32_MAX;
2840     } else {
2841         return res;
2842     }
2843 }
2844 
2845 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2846 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2847 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2848 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1)
2849 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2)
2850 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4)
2851 
2852 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8)
2853 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16)
2854 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32)
2855 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1)
2856 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2)
2857 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4)
2858 
2859 /*
2860  *** Vector Float Point Arithmetic Instructions
2861  */
2862 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2863 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2864 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2865                       CPURISCVState *env)                      \
2866 {                                                              \
2867     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2868     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2869     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2870 }
2871 
2872 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2873 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2874                   void *vs2, CPURISCVState *env,          \
2875                   uint32_t desc)                          \
2876 {                                                         \
2877     uint32_t vm = vext_vm(desc);                          \
2878     uint32_t vl = env->vl;                                \
2879     uint32_t i;                                           \
2880                                                           \
2881     for (i = 0; i < vl; i++) {                            \
2882         if (!vm && !vext_elem_mask(v0, i)) {              \
2883             continue;                                     \
2884         }                                                 \
2885         do_##NAME(vd, vs1, vs2, i, env);                  \
2886     }                                                     \
2887 }
2888 
2889 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2890 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2891 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2892 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2893 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2894 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2895 
2896 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2897 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2898                       CPURISCVState *env)                      \
2899 {                                                              \
2900     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2901     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2902 }
2903 
2904 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2905 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2906                   void *vs2, CPURISCVState *env,          \
2907                   uint32_t desc)                          \
2908 {                                                         \
2909     uint32_t vm = vext_vm(desc);                          \
2910     uint32_t vl = env->vl;                                \
2911     uint32_t i;                                           \
2912                                                           \
2913     for (i = 0; i < vl; i++) {                            \
2914         if (!vm && !vext_elem_mask(v0, i)) {              \
2915             continue;                                     \
2916         }                                                 \
2917         do_##NAME(vd, s1, vs2, i, env);                   \
2918     }                                                     \
2919 }
2920 
2921 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2922 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2923 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2924 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2925 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2926 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2927 
2928 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2929 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2930 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2931 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2932 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2933 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2934 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2935 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2936 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2937 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2938 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2939 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2940 
2941 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2942 {
2943     return float16_sub(b, a, s);
2944 }
2945 
2946 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2947 {
2948     return float32_sub(b, a, s);
2949 }
2950 
2951 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2952 {
2953     return float64_sub(b, a, s);
2954 }
2955 
2956 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2957 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2958 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2959 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2960 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2961 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2962 
2963 /* Vector Widening Floating-Point Add/Subtract Instructions */
2964 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2965 {
2966     return float32_add(float16_to_float32(a, true, s),
2967             float16_to_float32(b, true, s), s);
2968 }
2969 
2970 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2971 {
2972     return float64_add(float32_to_float64(a, s),
2973             float32_to_float64(b, s), s);
2974 
2975 }
2976 
2977 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2978 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2979 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2980 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2981 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2982 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2983 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2984 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2985 
2986 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2987 {
2988     return float32_sub(float16_to_float32(a, true, s),
2989             float16_to_float32(b, true, s), s);
2990 }
2991 
2992 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2993 {
2994     return float64_sub(float32_to_float64(a, s),
2995             float32_to_float64(b, s), s);
2996 
2997 }
2998 
2999 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3000 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3001 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
3002 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
3003 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3004 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3005 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
3006 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
3007 
3008 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3009 {
3010     return float32_add(a, float16_to_float32(b, true, s), s);
3011 }
3012 
3013 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3014 {
3015     return float64_add(a, float32_to_float64(b, s), s);
3016 }
3017 
3018 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3019 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3020 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
3021 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
3022 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3023 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3024 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
3025 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
3026 
3027 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3028 {
3029     return float32_sub(a, float16_to_float32(b, true, s), s);
3030 }
3031 
3032 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3033 {
3034     return float64_sub(a, float32_to_float64(b, s), s);
3035 }
3036 
3037 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3038 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3039 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
3040 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
3041 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3042 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3043 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
3044 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
3045 
3046 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3047 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3048 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3049 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3050 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
3051 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
3052 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
3053 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3054 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3055 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3056 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
3057 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
3058 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
3059 
3060 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3061 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3062 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3063 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
3064 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
3065 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
3066 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3067 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3068 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3069 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
3070 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3071 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3072 
3073 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3074 {
3075     return float16_div(b, a, s);
3076 }
3077 
3078 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3079 {
3080     return float32_div(b, a, s);
3081 }
3082 
3083 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3084 {
3085     return float64_div(b, a, s);
3086 }
3087 
3088 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3089 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3090 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3091 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3092 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3093 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3094 
3095 /* Vector Widening Floating-Point Multiply */
3096 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3097 {
3098     return float32_mul(float16_to_float32(a, true, s),
3099             float16_to_float32(b, true, s), s);
3100 }
3101 
3102 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3103 {
3104     return float64_mul(float32_to_float64(a, s),
3105             float32_to_float64(b, s), s);
3106 
3107 }
3108 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3109 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3110 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3111 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3112 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3113 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3114 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3115 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3116 
3117 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3118 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3119 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3120         CPURISCVState *env)                                        \
3121 {                                                                  \
3122     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3123     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3124     TD d = *((TD *)vd + HD(i));                                    \
3125     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3126 }
3127 
3128 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3129 {
3130     return float16_muladd(a, b, d, 0, s);
3131 }
3132 
3133 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3134 {
3135     return float32_muladd(a, b, d, 0, s);
3136 }
3137 
3138 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3139 {
3140     return float64_muladd(a, b, d, 0, s);
3141 }
3142 
3143 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3144 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3145 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3146 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3147 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3148 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3149 
3150 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3151 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3152         CPURISCVState *env)                                       \
3153 {                                                                 \
3154     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3155     TD d = *((TD *)vd + HD(i));                                   \
3156     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3157 }
3158 
3159 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3160 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3161 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3162 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3163 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3164 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3165 
3166 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3167 {
3168     return float16_muladd(a, b, d,
3169             float_muladd_negate_c | float_muladd_negate_product, s);
3170 }
3171 
3172 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3173 {
3174     return float32_muladd(a, b, d,
3175             float_muladd_negate_c | float_muladd_negate_product, s);
3176 }
3177 
3178 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3179 {
3180     return float64_muladd(a, b, d,
3181             float_muladd_negate_c | float_muladd_negate_product, s);
3182 }
3183 
3184 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3185 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3186 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3187 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3188 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3189 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3190 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3191 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3192 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3193 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3194 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3195 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3196 
3197 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3198 {
3199     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3200 }
3201 
3202 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3203 {
3204     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3205 }
3206 
3207 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3208 {
3209     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3210 }
3211 
3212 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3213 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3214 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3215 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3216 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3217 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3218 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3219 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3220 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3221 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3222 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3223 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3224 
3225 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3226 {
3227     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3228 }
3229 
3230 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3231 {
3232     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3233 }
3234 
3235 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3236 {
3237     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3238 }
3239 
3240 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3241 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3242 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3243 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3244 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3245 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3246 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3247 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3248 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3249 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3250 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3251 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3252 
3253 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3254 {
3255     return float16_muladd(d, b, a, 0, s);
3256 }
3257 
3258 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3259 {
3260     return float32_muladd(d, b, a, 0, s);
3261 }
3262 
3263 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3264 {
3265     return float64_muladd(d, b, a, 0, s);
3266 }
3267 
3268 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3269 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3270 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3271 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3272 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3273 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3274 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3275 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3276 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3277 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3278 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3279 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3280 
3281 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3282 {
3283     return float16_muladd(d, b, a,
3284             float_muladd_negate_c | float_muladd_negate_product, s);
3285 }
3286 
3287 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3288 {
3289     return float32_muladd(d, b, a,
3290             float_muladd_negate_c | float_muladd_negate_product, s);
3291 }
3292 
3293 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3294 {
3295     return float64_muladd(d, b, a,
3296             float_muladd_negate_c | float_muladd_negate_product, s);
3297 }
3298 
3299 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3300 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3301 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3302 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3303 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3304 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3305 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3306 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3307 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3308 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3309 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3310 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3311 
3312 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3313 {
3314     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3315 }
3316 
3317 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3318 {
3319     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3320 }
3321 
3322 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3323 {
3324     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3325 }
3326 
3327 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3328 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3329 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3330 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3331 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3332 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3333 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3334 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3335 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3336 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3337 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3338 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3339 
3340 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3341 {
3342     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3343 }
3344 
3345 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3346 {
3347     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3348 }
3349 
3350 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3351 {
3352     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3353 }
3354 
3355 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3356 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3357 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3358 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3359 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3360 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3361 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3362 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3363 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3364 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3365 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3366 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3367 
3368 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3369 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3370 {
3371     return float32_muladd(float16_to_float32(a, true, s),
3372                         float16_to_float32(b, true, s), d, 0, s);
3373 }
3374 
3375 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3376 {
3377     return float64_muladd(float32_to_float64(a, s),
3378                         float32_to_float64(b, s), d, 0, s);
3379 }
3380 
3381 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3382 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3383 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3384 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3385 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3386 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3387 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3388 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3389 
3390 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3391 {
3392     return float32_muladd(float16_to_float32(a, true, s),
3393                         float16_to_float32(b, true, s), d,
3394                         float_muladd_negate_c | float_muladd_negate_product, s);
3395 }
3396 
3397 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3398 {
3399     return float64_muladd(float32_to_float64(a, s),
3400                         float32_to_float64(b, s), d,
3401                         float_muladd_negate_c | float_muladd_negate_product, s);
3402 }
3403 
3404 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3405 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3406 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3407 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3408 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3409 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3410 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3411 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3412 
3413 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3414 {
3415     return float32_muladd(float16_to_float32(a, true, s),
3416                         float16_to_float32(b, true, s), d,
3417                         float_muladd_negate_c, s);
3418 }
3419 
3420 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3421 {
3422     return float64_muladd(float32_to_float64(a, s),
3423                         float32_to_float64(b, s), d,
3424                         float_muladd_negate_c, s);
3425 }
3426 
3427 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3428 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3429 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3430 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3431 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3432 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3433 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3434 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3435 
3436 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3437 {
3438     return float32_muladd(float16_to_float32(a, true, s),
3439                         float16_to_float32(b, true, s), d,
3440                         float_muladd_negate_product, s);
3441 }
3442 
3443 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3444 {
3445     return float64_muladd(float32_to_float64(a, s),
3446                         float32_to_float64(b, s), d,
3447                         float_muladd_negate_product, s);
3448 }
3449 
3450 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3451 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3452 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3453 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3454 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3455 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3456 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3457 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3458 
3459 /* Vector Floating-Point Square-Root Instruction */
3460 /* (TD, T2, TX2) */
3461 #define OP_UU_H uint16_t, uint16_t, uint16_t
3462 #define OP_UU_W uint32_t, uint32_t, uint32_t
3463 #define OP_UU_D uint64_t, uint64_t, uint64_t
3464 
3465 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3466 static void do_##NAME(void *vd, void *vs2, int i,      \
3467         CPURISCVState *env)                            \
3468 {                                                      \
3469     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3470     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3471 }
3472 
3473 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3474 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3475         CPURISCVState *env, uint32_t desc)             \
3476 {                                                      \
3477     uint32_t vm = vext_vm(desc);                       \
3478     uint32_t vl = env->vl;                             \
3479     uint32_t i;                                        \
3480                                                        \
3481     if (vl == 0) {                                     \
3482         return;                                        \
3483     }                                                  \
3484     for (i = 0; i < vl; i++) {                         \
3485         if (!vm && !vext_elem_mask(v0, i)) {           \
3486             continue;                                  \
3487         }                                              \
3488         do_##NAME(vd, vs2, i, env);                    \
3489     }                                                  \
3490 }
3491 
3492 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3493 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3494 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3495 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3496 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3497 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3498 
3499 /* Vector Floating-Point MIN/MAX Instructions */
3500 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum)
3501 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum)
3502 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum)
3503 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3504 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3505 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3506 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum)
3507 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum)
3508 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum)
3509 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3510 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3511 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3512 
3513 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum)
3514 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum)
3515 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum)
3516 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3517 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3518 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3519 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum)
3520 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum)
3521 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum)
3522 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3523 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3524 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3525 
3526 /* Vector Floating-Point Sign-Injection Instructions */
3527 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3528 {
3529     return deposit64(b, 0, 15, a);
3530 }
3531 
3532 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3533 {
3534     return deposit64(b, 0, 31, a);
3535 }
3536 
3537 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3538 {
3539     return deposit64(b, 0, 63, a);
3540 }
3541 
3542 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3543 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3544 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3545 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3546 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3547 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3548 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3549 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3550 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3551 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3552 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3553 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3554 
3555 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3556 {
3557     return deposit64(~b, 0, 15, a);
3558 }
3559 
3560 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3561 {
3562     return deposit64(~b, 0, 31, a);
3563 }
3564 
3565 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3566 {
3567     return deposit64(~b, 0, 63, a);
3568 }
3569 
3570 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3571 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3572 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3573 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3574 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3575 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3576 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3577 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3578 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3579 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3580 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3581 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3582 
3583 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3584 {
3585     return deposit64(b ^ a, 0, 15, a);
3586 }
3587 
3588 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3589 {
3590     return deposit64(b ^ a, 0, 31, a);
3591 }
3592 
3593 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3594 {
3595     return deposit64(b ^ a, 0, 63, a);
3596 }
3597 
3598 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3599 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3600 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3601 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3602 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3603 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3604 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3605 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3606 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3607 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3608 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3609 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3610 
3611 /* Vector Floating-Point Compare Instructions */
3612 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3613 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3614                   CPURISCVState *env, uint32_t desc)          \
3615 {                                                             \
3616     uint32_t vm = vext_vm(desc);                              \
3617     uint32_t vl = env->vl;                                    \
3618     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
3619     uint32_t i;                                               \
3620                                                               \
3621     for (i = 0; i < vl; i++) {                                \
3622         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3623         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3624         if (!vm && !vext_elem_mask(v0, i)) {                  \
3625             continue;                                         \
3626         }                                                     \
3627         vext_set_elem_mask(vd, i,                             \
3628                            DO_OP(s2, s1, &env->fp_status));   \
3629     }                                                         \
3630     for (; i < vlmax; i++) {                                  \
3631         vext_set_elem_mask(vd, i, 0);                         \
3632     }                                                         \
3633 }
3634 
3635 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3636 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3637 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3638 
3639 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3640 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3641                   CPURISCVState *env, uint32_t desc)                \
3642 {                                                                   \
3643     uint32_t vm = vext_vm(desc);                                    \
3644     uint32_t vl = env->vl;                                          \
3645     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
3646     uint32_t i;                                                     \
3647                                                                     \
3648     for (i = 0; i < vl; i++) {                                      \
3649         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3650         if (!vm && !vext_elem_mask(v0, i)) {                        \
3651             continue;                                               \
3652         }                                                           \
3653         vext_set_elem_mask(vd, i,                                   \
3654                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3655     }                                                               \
3656     for (; i < vlmax; i++) {                                        \
3657         vext_set_elem_mask(vd, i, 0);                               \
3658     }                                                               \
3659 }
3660 
3661 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3662 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3663 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3664 
3665 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3666 {
3667     FloatRelation compare = float16_compare_quiet(a, b, s);
3668     return compare != float_relation_equal;
3669 }
3670 
3671 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3672 {
3673     FloatRelation compare = float32_compare_quiet(a, b, s);
3674     return compare != float_relation_equal;
3675 }
3676 
3677 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3678 {
3679     FloatRelation compare = float64_compare_quiet(a, b, s);
3680     return compare != float_relation_equal;
3681 }
3682 
3683 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3684 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3685 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3686 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3687 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3688 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3689 
3690 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3691 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3692 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3693 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3694 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3695 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3696 
3697 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3698 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
3699 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
3700 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
3701 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
3702 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
3703 
3704 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
3705 {
3706     FloatRelation compare = float16_compare(a, b, s);
3707     return compare == float_relation_greater;
3708 }
3709 
3710 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
3711 {
3712     FloatRelation compare = float32_compare(a, b, s);
3713     return compare == float_relation_greater;
3714 }
3715 
3716 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
3717 {
3718     FloatRelation compare = float64_compare(a, b, s);
3719     return compare == float_relation_greater;
3720 }
3721 
3722 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
3723 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
3724 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
3725 
3726 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
3727 {
3728     FloatRelation compare = float16_compare(a, b, s);
3729     return compare == float_relation_greater ||
3730            compare == float_relation_equal;
3731 }
3732 
3733 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
3734 {
3735     FloatRelation compare = float32_compare(a, b, s);
3736     return compare == float_relation_greater ||
3737            compare == float_relation_equal;
3738 }
3739 
3740 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
3741 {
3742     FloatRelation compare = float64_compare(a, b, s);
3743     return compare == float_relation_greater ||
3744            compare == float_relation_equal;
3745 }
3746 
3747 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
3748 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
3749 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
3750 
3751 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet)
3752 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet)
3753 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet)
3754 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet)
3755 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet)
3756 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet)
3757 
3758 /* Vector Floating-Point Classify Instruction */
3759 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
3760 static void do_##NAME(void *vd, void *vs2, int i)      \
3761 {                                                      \
3762     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3763     *((TD *)vd + HD(i)) = OP(s2);                      \
3764 }
3765 
3766 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
3767 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3768                   CPURISCVState *env, uint32_t desc)   \
3769 {                                                      \
3770     uint32_t vm = vext_vm(desc);                       \
3771     uint32_t vl = env->vl;                             \
3772     uint32_t i;                                        \
3773                                                        \
3774     for (i = 0; i < vl; i++) {                         \
3775         if (!vm && !vext_elem_mask(v0, i)) {           \
3776             continue;                                  \
3777         }                                              \
3778         do_##NAME(vd, vs2, i);                         \
3779     }                                                  \
3780 }
3781 
3782 target_ulong fclass_h(uint64_t frs1)
3783 {
3784     float16 f = frs1;
3785     bool sign = float16_is_neg(f);
3786 
3787     if (float16_is_infinity(f)) {
3788         return sign ? 1 << 0 : 1 << 7;
3789     } else if (float16_is_zero(f)) {
3790         return sign ? 1 << 3 : 1 << 4;
3791     } else if (float16_is_zero_or_denormal(f)) {
3792         return sign ? 1 << 2 : 1 << 5;
3793     } else if (float16_is_any_nan(f)) {
3794         float_status s = { }; /* for snan_bit_is_one */
3795         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3796     } else {
3797         return sign ? 1 << 1 : 1 << 6;
3798     }
3799 }
3800 
3801 target_ulong fclass_s(uint64_t frs1)
3802 {
3803     float32 f = frs1;
3804     bool sign = float32_is_neg(f);
3805 
3806     if (float32_is_infinity(f)) {
3807         return sign ? 1 << 0 : 1 << 7;
3808     } else if (float32_is_zero(f)) {
3809         return sign ? 1 << 3 : 1 << 4;
3810     } else if (float32_is_zero_or_denormal(f)) {
3811         return sign ? 1 << 2 : 1 << 5;
3812     } else if (float32_is_any_nan(f)) {
3813         float_status s = { }; /* for snan_bit_is_one */
3814         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3815     } else {
3816         return sign ? 1 << 1 : 1 << 6;
3817     }
3818 }
3819 
3820 target_ulong fclass_d(uint64_t frs1)
3821 {
3822     float64 f = frs1;
3823     bool sign = float64_is_neg(f);
3824 
3825     if (float64_is_infinity(f)) {
3826         return sign ? 1 << 0 : 1 << 7;
3827     } else if (float64_is_zero(f)) {
3828         return sign ? 1 << 3 : 1 << 4;
3829     } else if (float64_is_zero_or_denormal(f)) {
3830         return sign ? 1 << 2 : 1 << 5;
3831     } else if (float64_is_any_nan(f)) {
3832         float_status s = { }; /* for snan_bit_is_one */
3833         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3834     } else {
3835         return sign ? 1 << 1 : 1 << 6;
3836     }
3837 }
3838 
3839 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
3840 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
3841 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
3842 GEN_VEXT_V(vfclass_v_h, 2, 2)
3843 GEN_VEXT_V(vfclass_v_w, 4, 4)
3844 GEN_VEXT_V(vfclass_v_d, 8, 8)
3845 
3846 /* Vector Floating-Point Merge Instruction */
3847 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
3848 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3849                   CPURISCVState *env, uint32_t desc)          \
3850 {                                                             \
3851     uint32_t vm = vext_vm(desc);                              \
3852     uint32_t vl = env->vl;                                    \
3853     uint32_t i;                                               \
3854                                                               \
3855     for (i = 0; i < vl; i++) {                                \
3856         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3857         *((ETYPE *)vd + H(i))                                 \
3858           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
3859     }                                                         \
3860 }
3861 
3862 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
3863 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
3864 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
3865 
3866 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
3867 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3868 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
3869 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
3870 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
3871 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
3872 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
3873 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
3874 
3875 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
3876 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
3877 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
3878 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
3879 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
3880 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
3881 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
3882 
3883 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
3884 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
3885 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
3886 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
3887 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
3888 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
3889 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
3890 
3891 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
3892 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
3893 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
3894 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
3895 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
3896 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
3897 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
3898 
3899 /* Widening Floating-Point/Integer Type-Convert Instructions */
3900 /* (TD, T2, TX2) */
3901 #define WOP_UU_H uint32_t, uint16_t, uint16_t
3902 #define WOP_UU_W uint64_t, uint32_t, uint32_t
3903 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
3904 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
3905 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
3906 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
3907 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
3908 
3909 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
3910 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
3911 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
3912 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
3913 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
3914 
3915 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3916 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
3917 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
3918 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
3919 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
3920 
3921 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3922 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
3923 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
3924 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
3925 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
3926 
3927 /*
3928  * vfwcvt.f.f.v vd, vs2, vm #
3929  * Convert single-width float to double-width float.
3930  */
3931 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
3932 {
3933     return float16_to_float32(a, true, s);
3934 }
3935 
3936 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
3937 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
3938 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
3939 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
3940 
3941 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
3942 /* (TD, T2, TX2) */
3943 #define NOP_UU_H uint16_t, uint32_t, uint32_t
3944 #define NOP_UU_W uint32_t, uint64_t, uint64_t
3945 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3946 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16)
3947 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32)
3948 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2)
3949 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4)
3950 
3951 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
3952 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16)
3953 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32)
3954 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2)
3955 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4)
3956 
3957 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
3958 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16)
3959 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32)
3960 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2)
3961 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4)
3962 
3963 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
3964 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16)
3965 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32)
3966 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2)
3967 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4)
3968 
3969 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
3970 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
3971 {
3972     return float32_to_float16(a, true, s);
3973 }
3974 
3975 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
3976 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
3977 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2)
3978 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4)
3979 
3980 /*
3981  *** Vector Reduction Operations
3982  */
3983 /* Vector Single-Width Integer Reduction Instructions */
3984 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
3985 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
3986         void *vs2, CPURISCVState *env, uint32_t desc)     \
3987 {                                                         \
3988     uint32_t vm = vext_vm(desc);                          \
3989     uint32_t vl = env->vl;                                \
3990     uint32_t i;                                           \
3991     TD s1 =  *((TD *)vs1 + HD(0));                        \
3992                                                           \
3993     for (i = 0; i < vl; i++) {                            \
3994         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
3995         if (!vm && !vext_elem_mask(v0, i)) {              \
3996             continue;                                     \
3997         }                                                 \
3998         s1 = OP(s1, (TD)s2);                              \
3999     }                                                     \
4000     *((TD *)vd + HD(0)) = s1;                             \
4001 }
4002 
4003 /* vd[0] = sum(vs1[0], vs2[*]) */
4004 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4005 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4006 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4007 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4008 
4009 /* vd[0] = maxu(vs1[0], vs2[*]) */
4010 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4011 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4012 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4013 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4014 
4015 /* vd[0] = max(vs1[0], vs2[*]) */
4016 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4017 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4018 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4019 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4020 
4021 /* vd[0] = minu(vs1[0], vs2[*]) */
4022 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4023 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4024 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4025 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4026 
4027 /* vd[0] = min(vs1[0], vs2[*]) */
4028 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4029 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4030 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4031 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4032 
4033 /* vd[0] = and(vs1[0], vs2[*]) */
4034 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4035 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4036 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4037 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4038 
4039 /* vd[0] = or(vs1[0], vs2[*]) */
4040 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4041 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4042 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4043 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4044 
4045 /* vd[0] = xor(vs1[0], vs2[*]) */
4046 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4047 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4048 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4049 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4050 
4051 /* Vector Widening Integer Reduction Instructions */
4052 /* signed sum reduction into double-width accumulator */
4053 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4054 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4055 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4056 
4057 /* Unsigned sum reduction into double-width accumulator */
4058 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4059 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4060 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4061 
4062 /* Vector Single-Width Floating-Point Reduction Instructions */
4063 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4064 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4065                   void *vs2, CPURISCVState *env,           \
4066                   uint32_t desc)                           \
4067 {                                                          \
4068     uint32_t vm = vext_vm(desc);                           \
4069     uint32_t vl = env->vl;                                 \
4070     uint32_t i;                                            \
4071     TD s1 =  *((TD *)vs1 + HD(0));                         \
4072                                                            \
4073     for (i = 0; i < vl; i++) {                             \
4074         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4075         if (!vm && !vext_elem_mask(v0, i)) {               \
4076             continue;                                      \
4077         }                                                  \
4078         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4079     }                                                      \
4080     *((TD *)vd + HD(0)) = s1;                              \
4081 }
4082 
4083 /* Unordered sum */
4084 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4085 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4086 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4087 
4088 /* Maximum value */
4089 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum)
4090 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum)
4091 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum)
4092 
4093 /* Minimum value */
4094 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum)
4095 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum)
4096 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum)
4097 
4098 /* Vector Widening Floating-Point Reduction Instructions */
4099 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4100 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4101                             void *vs2, CPURISCVState *env, uint32_t desc)
4102 {
4103     uint32_t vm = vext_vm(desc);
4104     uint32_t vl = env->vl;
4105     uint32_t i;
4106     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4107 
4108     for (i = 0; i < vl; i++) {
4109         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4110         if (!vm && !vext_elem_mask(v0, i)) {
4111             continue;
4112         }
4113         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4114                          &env->fp_status);
4115     }
4116     *((uint32_t *)vd + H4(0)) = s1;
4117 }
4118 
4119 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4120                             void *vs2, CPURISCVState *env, uint32_t desc)
4121 {
4122     uint32_t vm = vext_vm(desc);
4123     uint32_t vl = env->vl;
4124     uint32_t i;
4125     uint64_t s1 =  *((uint64_t *)vs1);
4126 
4127     for (i = 0; i < vl; i++) {
4128         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4129         if (!vm && !vext_elem_mask(v0, i)) {
4130             continue;
4131         }
4132         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4133                          &env->fp_status);
4134     }
4135     *((uint64_t *)vd) = s1;
4136 }
4137 
4138 /*
4139  *** Vector Mask Operations
4140  */
4141 /* Vector Mask-Register Logical Instructions */
4142 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4143 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4144                   void *vs2, CPURISCVState *env,          \
4145                   uint32_t desc)                          \
4146 {                                                         \
4147     uint32_t vlmax = env_archcpu(env)->cfg.vlen;          \
4148     uint32_t vl = env->vl;                                \
4149     uint32_t i;                                           \
4150     int a, b;                                             \
4151                                                           \
4152     for (i = 0; i < vl; i++) {                            \
4153         a = vext_elem_mask(vs1, i);                       \
4154         b = vext_elem_mask(vs2, i);                       \
4155         vext_set_elem_mask(vd, i, OP(b, a));              \
4156     }                                                     \
4157     for (; i < vlmax; i++) {                              \
4158         vext_set_elem_mask(vd, i, 0);                     \
4159     }                                                     \
4160 }
4161 
4162 #define DO_NAND(N, M)  (!(N & M))
4163 #define DO_ANDNOT(N, M)  (N & !M)
4164 #define DO_NOR(N, M)  (!(N | M))
4165 #define DO_ORNOT(N, M)  (N | !M)
4166 #define DO_XNOR(N, M)  (!(N ^ M))
4167 
4168 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4169 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4170 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
4171 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4172 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4173 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4174 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
4175 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4176 
4177 /* Vector mask population count vmpopc */
4178 target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env,
4179                               uint32_t desc)
4180 {
4181     target_ulong cnt = 0;
4182     uint32_t vm = vext_vm(desc);
4183     uint32_t vl = env->vl;
4184     int i;
4185 
4186     for (i = 0; i < vl; i++) {
4187         if (vm || vext_elem_mask(v0, i)) {
4188             if (vext_elem_mask(vs2, i)) {
4189                 cnt++;
4190             }
4191         }
4192     }
4193     return cnt;
4194 }
4195 
4196 /* vmfirst find-first-set mask bit*/
4197 target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4198                                uint32_t desc)
4199 {
4200     uint32_t vm = vext_vm(desc);
4201     uint32_t vl = env->vl;
4202     int i;
4203 
4204     for (i = 0; i < vl; i++) {
4205         if (vm || vext_elem_mask(v0, i)) {
4206             if (vext_elem_mask(vs2, i)) {
4207                 return i;
4208             }
4209         }
4210     }
4211     return -1LL;
4212 }
4213 
4214 enum set_mask_type {
4215     ONLY_FIRST = 1,
4216     INCLUDE_FIRST,
4217     BEFORE_FIRST,
4218 };
4219 
4220 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4221                    uint32_t desc, enum set_mask_type type)
4222 {
4223     uint32_t vlmax = env_archcpu(env)->cfg.vlen;
4224     uint32_t vm = vext_vm(desc);
4225     uint32_t vl = env->vl;
4226     int i;
4227     bool first_mask_bit = false;
4228 
4229     for (i = 0; i < vl; i++) {
4230         if (!vm && !vext_elem_mask(v0, i)) {
4231             continue;
4232         }
4233         /* write a zero to all following active elements */
4234         if (first_mask_bit) {
4235             vext_set_elem_mask(vd, i, 0);
4236             continue;
4237         }
4238         if (vext_elem_mask(vs2, i)) {
4239             first_mask_bit = true;
4240             if (type == BEFORE_FIRST) {
4241                 vext_set_elem_mask(vd, i, 0);
4242             } else {
4243                 vext_set_elem_mask(vd, i, 1);
4244             }
4245         } else {
4246             if (type == ONLY_FIRST) {
4247                 vext_set_elem_mask(vd, i, 0);
4248             } else {
4249                 vext_set_elem_mask(vd, i, 1);
4250             }
4251         }
4252     }
4253     for (; i < vlmax; i++) {
4254         vext_set_elem_mask(vd, i, 0);
4255     }
4256 }
4257 
4258 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4259                      uint32_t desc)
4260 {
4261     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4262 }
4263 
4264 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4265                      uint32_t desc)
4266 {
4267     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4268 }
4269 
4270 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4271                      uint32_t desc)
4272 {
4273     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4274 }
4275 
4276 /* Vector Iota Instruction */
4277 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4278 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4279                   uint32_t desc)                                          \
4280 {                                                                         \
4281     uint32_t vm = vext_vm(desc);                                          \
4282     uint32_t vl = env->vl;                                                \
4283     uint32_t sum = 0;                                                     \
4284     int i;                                                                \
4285                                                                           \
4286     for (i = 0; i < vl; i++) {                                            \
4287         if (!vm && !vext_elem_mask(v0, i)) {                              \
4288             continue;                                                     \
4289         }                                                                 \
4290         *((ETYPE *)vd + H(i)) = sum;                                      \
4291         if (vext_elem_mask(vs2, i)) {                                     \
4292             sum++;                                                        \
4293         }                                                                 \
4294     }                                                                     \
4295 }
4296 
4297 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4298 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4299 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4300 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4301 
4302 /* Vector Element Index Instruction */
4303 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4304 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4305 {                                                                         \
4306     uint32_t vm = vext_vm(desc);                                          \
4307     uint32_t vl = env->vl;                                                \
4308     int i;                                                                \
4309                                                                           \
4310     for (i = 0; i < vl; i++) {                                            \
4311         if (!vm && !vext_elem_mask(v0, i)) {                              \
4312             continue;                                                     \
4313         }                                                                 \
4314         *((ETYPE *)vd + H(i)) = i;                                        \
4315     }                                                                     \
4316 }
4317 
4318 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4319 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4320 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4321 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4322 
4323 /*
4324  *** Vector Permutation Instructions
4325  */
4326 
4327 /* Vector Slide Instructions */
4328 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4329 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4330                   CPURISCVState *env, uint32_t desc)                      \
4331 {                                                                         \
4332     uint32_t vm = vext_vm(desc);                                          \
4333     uint32_t vl = env->vl;                                                \
4334     target_ulong offset = s1, i;                                          \
4335                                                                           \
4336     for (i = offset; i < vl; i++) {                                       \
4337         if (!vm && !vext_elem_mask(v0, i)) {                              \
4338             continue;                                                     \
4339         }                                                                 \
4340         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4341     }                                                                     \
4342 }
4343 
4344 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4345 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4346 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4347 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4348 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4349 
4350 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4351 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4352                   CPURISCVState *env, uint32_t desc)                      \
4353 {                                                                         \
4354     uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
4355     uint32_t vm = vext_vm(desc);                                          \
4356     uint32_t vl = env->vl;                                                \
4357     target_ulong offset = s1, i;                                          \
4358                                                                           \
4359     for (i = 0; i < vl; ++i) {                                            \
4360         target_ulong j = i + offset;                                      \
4361         if (!vm && !vext_elem_mask(v0, i)) {                              \
4362             continue;                                                     \
4363         }                                                                 \
4364         *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j));  \
4365     }                                                                     \
4366 }
4367 
4368 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4369 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4370 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4371 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4372 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4373 
4374 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H)                             \
4375 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4376                   CPURISCVState *env, uint32_t desc)                      \
4377 {                                                                         \
4378     uint32_t vm = vext_vm(desc);                                          \
4379     uint32_t vl = env->vl;                                                \
4380     uint32_t i;                                                           \
4381                                                                           \
4382     for (i = 0; i < vl; i++) {                                            \
4383         if (!vm && !vext_elem_mask(v0, i)) {                              \
4384             continue;                                                     \
4385         }                                                                 \
4386         if (i == 0) {                                                     \
4387             *((ETYPE *)vd + H(i)) = s1;                                   \
4388         } else {                                                          \
4389             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));           \
4390         }                                                                 \
4391     }                                                                     \
4392 }
4393 
4394 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4395 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t,  H1)
4396 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2)
4397 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4)
4398 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8)
4399 
4400 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H)                           \
4401 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4402                   CPURISCVState *env, uint32_t desc)                      \
4403 {                                                                         \
4404     uint32_t vm = vext_vm(desc);                                          \
4405     uint32_t vl = env->vl;                                                \
4406     uint32_t i;                                                           \
4407                                                                           \
4408     for (i = 0; i < vl; i++) {                                            \
4409         if (!vm && !vext_elem_mask(v0, i)) {                              \
4410             continue;                                                     \
4411         }                                                                 \
4412         if (i == vl - 1) {                                                \
4413             *((ETYPE *)vd + H(i)) = s1;                                   \
4414         } else {                                                          \
4415             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));           \
4416         }                                                                 \
4417     }                                                                     \
4418 }
4419 
4420 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4421 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t,  H1)
4422 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2)
4423 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4)
4424 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8)
4425 
4426 /* Vector Register Gather Instruction */
4427 #define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H)                              \
4428 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4429                   CPURISCVState *env, uint32_t desc)                      \
4430 {                                                                         \
4431     uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
4432     uint32_t vm = vext_vm(desc);                                          \
4433     uint32_t vl = env->vl;                                                \
4434     uint64_t index;                                                       \
4435     uint32_t i;                                                           \
4436                                                                           \
4437     for (i = 0; i < vl; i++) {                                            \
4438         if (!vm && !vext_elem_mask(v0, i)) {                              \
4439             continue;                                                     \
4440         }                                                                 \
4441         index = *((ETYPE *)vs1 + H(i));                                   \
4442         if (index >= vlmax) {                                             \
4443             *((ETYPE *)vd + H(i)) = 0;                                    \
4444         } else {                                                          \
4445             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4446         }                                                                 \
4447     }                                                                     \
4448 }
4449 
4450 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4451 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  H1)
4452 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2)
4453 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4)
4454 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8)
4455 
4456 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4457 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4458                   CPURISCVState *env, uint32_t desc)                      \
4459 {                                                                         \
4460     uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
4461     uint32_t vm = vext_vm(desc);                                          \
4462     uint32_t vl = env->vl;                                                \
4463     uint64_t index = s1;                                                  \
4464     uint32_t i;                                                           \
4465                                                                           \
4466     for (i = 0; i < vl; i++) {                                            \
4467         if (!vm && !vext_elem_mask(v0, i)) {                              \
4468             continue;                                                     \
4469         }                                                                 \
4470         if (index >= vlmax) {                                             \
4471             *((ETYPE *)vd + H(i)) = 0;                                    \
4472         } else {                                                          \
4473             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4474         }                                                                 \
4475     }                                                                     \
4476 }
4477 
4478 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4479 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4480 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4481 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4482 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4483 
4484 /* Vector Compress Instruction */
4485 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4486 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4487                   CPURISCVState *env, uint32_t desc)                      \
4488 {                                                                         \
4489     uint32_t vl = env->vl;                                                \
4490     uint32_t num = 0, i;                                                  \
4491                                                                           \
4492     for (i = 0; i < vl; i++) {                                            \
4493         if (!vext_elem_mask(vs1, i)) {                                    \
4494             continue;                                                     \
4495         }                                                                 \
4496         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4497         num++;                                                            \
4498     }                                                                     \
4499 }
4500 
4501 /* Compress into vd elements of vs2 where vs1 is enabled */
4502 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4503 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4504 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4505 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4506