xref: /openbmc/qemu/target/riscv/vector_helper.c (revision 5c89e9c0)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30 
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32                             target_ulong s2)
33 {
34     int vlmax, vl;
35     RISCVCPU *cpu = env_archcpu(env);
36     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39     bool vill = FIELD_EX64(s2, VTYPE, VILL);
40     target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
41 
42     if (lmul & 4) {
43         /* Fractional LMUL. */
44         if (lmul == 4 ||
45             cpu->cfg.elen >> (8 - lmul) < sew) {
46             vill = true;
47         }
48     }
49 
50     if ((sew > cpu->cfg.elen)
51         || vill
52         || (ediv != 0)
53         || (reserved != 0)) {
54         /* only set vill bit. */
55         env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
56         env->vl = 0;
57         env->vstart = 0;
58         return 0;
59     }
60 
61     vlmax = vext_get_vlmax(cpu, s2);
62     if (s1 <= vlmax) {
63         vl = s1;
64     } else {
65         vl = vlmax;
66     }
67     env->vl = vl;
68     env->vtype = s2;
69     env->vstart = 0;
70     return vl;
71 }
72 
73 /*
74  * Note that vector data is stored in host-endian 64-bit chunks,
75  * so addressing units smaller than that needs a host-endian fixup.
76  */
77 #ifdef HOST_WORDS_BIGENDIAN
78 #define H1(x)   ((x) ^ 7)
79 #define H1_2(x) ((x) ^ 6)
80 #define H1_4(x) ((x) ^ 4)
81 #define H2(x)   ((x) ^ 3)
82 #define H4(x)   ((x) ^ 1)
83 #define H8(x)   ((x))
84 #else
85 #define H1(x)   (x)
86 #define H1_2(x) (x)
87 #define H1_4(x) (x)
88 #define H2(x)   (x)
89 #define H4(x)   (x)
90 #define H8(x)   (x)
91 #endif
92 
93 static inline uint32_t vext_nf(uint32_t desc)
94 {
95     return FIELD_EX32(simd_data(desc), VDATA, NF);
96 }
97 
98 static inline uint32_t vext_vm(uint32_t desc)
99 {
100     return FIELD_EX32(simd_data(desc), VDATA, VM);
101 }
102 
103 /*
104  * Encode LMUL to lmul as following:
105  *     LMUL    vlmul    lmul
106  *      1       000       0
107  *      2       001       1
108  *      4       010       2
109  *      8       011       3
110  *      -       100       -
111  *     1/8      101      -3
112  *     1/4      110      -2
113  *     1/2      111      -1
114  */
115 static inline int32_t vext_lmul(uint32_t desc)
116 {
117     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
118 }
119 
120 /*
121  * Get the maximum number of elements can be operated.
122  *
123  * esz: log2 of element size in bytes.
124  */
125 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
126 {
127     /*
128      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
129      * so vlen in bytes (vlenb) is encoded as maxsz.
130      */
131     uint32_t vlenb = simd_maxsz(desc);
132 
133     /* Return VLMAX */
134     int scale = vext_lmul(desc) - esz;
135     return scale < 0 ? vlenb >> -scale : vlenb << scale;
136 }
137 
138 /*
139  * This function checks watchpoint before real load operation.
140  *
141  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
142  * In user mode, there is no watchpoint support now.
143  *
144  * It will trigger an exception if there is no mapping in TLB
145  * and page table walk can't fill the TLB entry. Then the guest
146  * software can return here after process the exception or never return.
147  */
148 static void probe_pages(CPURISCVState *env, target_ulong addr,
149                         target_ulong len, uintptr_t ra,
150                         MMUAccessType access_type)
151 {
152     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
153     target_ulong curlen = MIN(pagelen, len);
154 
155     probe_access(env, addr, curlen, access_type,
156                  cpu_mmu_index(env, false), ra);
157     if (len > curlen) {
158         addr += curlen;
159         curlen = len - curlen;
160         probe_access(env, addr, curlen, access_type,
161                      cpu_mmu_index(env, false), ra);
162     }
163 }
164 
165 static inline void vext_set_elem_mask(void *v0, int index,
166                                       uint8_t value)
167 {
168     int idx = index / 64;
169     int pos = index % 64;
170     uint64_t old = ((uint64_t *)v0)[idx];
171     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
172 }
173 
174 /*
175  * Earlier designs (pre-0.9) had a varying number of bits
176  * per mask value (MLEN). In the 0.9 design, MLEN=1.
177  * (Section 4.5)
178  */
179 static inline int vext_elem_mask(void *v0, int index)
180 {
181     int idx = index / 64;
182     int pos = index  % 64;
183     return (((uint64_t *)v0)[idx] >> pos) & 1;
184 }
185 
186 /* elements operations for load and store */
187 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
188                                uint32_t idx, void *vd, uintptr_t retaddr);
189 
190 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
191 static void NAME(CPURISCVState *env, abi_ptr addr,         \
192                  uint32_t idx, void *vd, uintptr_t retaddr)\
193 {                                                          \
194     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
195     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
196 }                                                          \
197 
198 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
199 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
200 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
201 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
202 
203 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
204 static void NAME(CPURISCVState *env, abi_ptr addr,         \
205                  uint32_t idx, void *vd, uintptr_t retaddr)\
206 {                                                          \
207     ETYPE data = *((ETYPE *)vd + H(idx));                  \
208     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
209 }
210 
211 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
212 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
213 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
214 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
215 
216 /*
217  *** stride: access vector element from strided memory
218  */
219 static void
220 vext_ldst_stride(void *vd, void *v0, target_ulong base,
221                  target_ulong stride, CPURISCVState *env,
222                  uint32_t desc, uint32_t vm,
223                  vext_ldst_elem_fn *ldst_elem,
224                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
225 {
226     uint32_t i, k;
227     uint32_t nf = vext_nf(desc);
228     uint32_t max_elems = vext_max_elems(desc, esz);
229 
230     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
231         if (!vm && !vext_elem_mask(v0, i)) {
232             continue;
233         }
234 
235         k = 0;
236         while (k < nf) {
237             target_ulong addr = base + stride * i + (k << esz);
238             ldst_elem(env, addr, i + k * max_elems, vd, ra);
239             k++;
240         }
241     }
242     env->vstart = 0;
243 }
244 
245 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
246 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
247                   target_ulong stride, CPURISCVState *env,              \
248                   uint32_t desc)                                        \
249 {                                                                       \
250     uint32_t vm = vext_vm(desc);                                        \
251     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
252                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
253 }
254 
255 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
256 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
257 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
258 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
259 
260 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
261 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
262                   target_ulong stride, CPURISCVState *env,              \
263                   uint32_t desc)                                        \
264 {                                                                       \
265     uint32_t vm = vext_vm(desc);                                        \
266     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
267                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
268 }
269 
270 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
271 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
272 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
273 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
274 
275 /*
276  *** unit-stride: access elements stored contiguously in memory
277  */
278 
279 /* unmasked unit-stride load and store operation*/
280 static void
281 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
282              vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
283              uintptr_t ra, MMUAccessType access_type)
284 {
285     uint32_t i, k;
286     uint32_t nf = vext_nf(desc);
287     uint32_t max_elems = vext_max_elems(desc, esz);
288 
289     /* load bytes from guest memory */
290     for (i = env->vstart; i < evl; i++, env->vstart++) {
291         k = 0;
292         while (k < nf) {
293             target_ulong addr = base + ((i * nf + k) << esz);
294             ldst_elem(env, addr, i + k * max_elems, vd, ra);
295             k++;
296         }
297     }
298     env->vstart = 0;
299 }
300 
301 /*
302  * masked unit-stride load and store operation will be a special case of stride,
303  * stride = NF * sizeof (MTYPE)
304  */
305 
306 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
307 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
308                          CPURISCVState *env, uint32_t desc)             \
309 {                                                                       \
310     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
311     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
312                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
313 }                                                                       \
314                                                                         \
315 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
316                   CPURISCVState *env, uint32_t desc)                    \
317 {                                                                       \
318     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
319                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
320 }
321 
322 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
323 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
324 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
325 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
326 
327 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                            \
328 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,          \
329                          CPURISCVState *env, uint32_t desc)              \
330 {                                                                        \
331     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));              \
332     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,   \
333                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);      \
334 }                                                                        \
335                                                                          \
336 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                 \
337                   CPURISCVState *env, uint32_t desc)                     \
338 {                                                                        \
339     vext_ldst_us(vd, base, env, desc, STORE_FN,                          \
340                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
341 }
342 
343 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
344 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
345 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
346 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
347 
348 /*
349  *** index: access vector element from indexed memory
350  */
351 typedef target_ulong vext_get_index_addr(target_ulong base,
352         uint32_t idx, void *vs2);
353 
354 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
355 static target_ulong NAME(target_ulong base,            \
356                          uint32_t idx, void *vs2)      \
357 {                                                      \
358     return (base + *((ETYPE *)vs2 + H(idx)));          \
359 }
360 
361 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
362 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
363 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
364 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
365 
366 static inline void
367 vext_ldst_index(void *vd, void *v0, target_ulong base,
368                 void *vs2, CPURISCVState *env, uint32_t desc,
369                 vext_get_index_addr get_index_addr,
370                 vext_ldst_elem_fn *ldst_elem,
371                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
372 {
373     uint32_t i, k;
374     uint32_t nf = vext_nf(desc);
375     uint32_t vm = vext_vm(desc);
376     uint32_t max_elems = vext_max_elems(desc, esz);
377 
378     /* load bytes from guest memory */
379     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
380         if (!vm && !vext_elem_mask(v0, i)) {
381             continue;
382         }
383 
384         k = 0;
385         while (k < nf) {
386             abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
387             ldst_elem(env, addr, i + k * max_elems, vd, ra);
388             k++;
389         }
390     }
391     env->vstart = 0;
392 }
393 
394 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
395 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
396                   void *vs2, CPURISCVState *env, uint32_t desc)            \
397 {                                                                          \
398     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
399                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
400 }
401 
402 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
403 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
404 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
405 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
406 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
407 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
408 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
409 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
410 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
411 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
412 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
413 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
414 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
415 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
416 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
417 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
418 
419 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
420 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
421                   void *vs2, CPURISCVState *env, uint32_t desc)  \
422 {                                                                \
423     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
424                     STORE_FN, ctzl(sizeof(ETYPE)),               \
425                     GETPC(), MMU_DATA_STORE);                    \
426 }
427 
428 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
429 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
430 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
431 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
432 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
433 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
434 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
435 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
436 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
437 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
438 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
439 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
440 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
441 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
442 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
443 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
444 
445 /*
446  *** unit-stride fault-only-fisrt load instructions
447  */
448 static inline void
449 vext_ldff(void *vd, void *v0, target_ulong base,
450           CPURISCVState *env, uint32_t desc,
451           vext_ldst_elem_fn *ldst_elem,
452           uint32_t esz, uintptr_t ra)
453 {
454     void *host;
455     uint32_t i, k, vl = 0;
456     uint32_t nf = vext_nf(desc);
457     uint32_t vm = vext_vm(desc);
458     uint32_t max_elems = vext_max_elems(desc, esz);
459     target_ulong addr, offset, remain;
460 
461     /* probe every access*/
462     for (i = env->vstart; i < env->vl; i++) {
463         if (!vm && !vext_elem_mask(v0, i)) {
464             continue;
465         }
466         addr = base + i * (nf << esz);
467         if (i == 0) {
468             probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
469         } else {
470             /* if it triggers an exception, no need to check watchpoint */
471             remain = nf << esz;
472             while (remain > 0) {
473                 offset = -(addr | TARGET_PAGE_MASK);
474                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
475                                          cpu_mmu_index(env, false));
476                 if (host) {
477 #ifdef CONFIG_USER_ONLY
478                     if (page_check_range(addr, nf << esz, PAGE_READ) < 0) {
479                         vl = i;
480                         goto ProbeSuccess;
481                     }
482 #else
483                     probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
484 #endif
485                 } else {
486                     vl = i;
487                     goto ProbeSuccess;
488                 }
489                 if (remain <=  offset) {
490                     break;
491                 }
492                 remain -= offset;
493                 addr += offset;
494             }
495         }
496     }
497 ProbeSuccess:
498     /* load bytes from guest memory */
499     if (vl != 0) {
500         env->vl = vl;
501     }
502     for (i = env->vstart; i < env->vl; i++) {
503         k = 0;
504         if (!vm && !vext_elem_mask(v0, i)) {
505             continue;
506         }
507         while (k < nf) {
508             target_ulong addr = base + ((i * nf + k) << esz);
509             ldst_elem(env, addr, i + k * max_elems, vd, ra);
510             k++;
511         }
512     }
513     env->vstart = 0;
514 }
515 
516 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
517 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
518                   CPURISCVState *env, uint32_t desc)      \
519 {                                                         \
520     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
521               ctzl(sizeof(ETYPE)), GETPC());              \
522 }
523 
524 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
525 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
526 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
527 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
528 
529 #define DO_SWAP(N, M) (M)
530 #define DO_AND(N, M)  (N & M)
531 #define DO_XOR(N, M)  (N ^ M)
532 #define DO_OR(N, M)   (N | M)
533 #define DO_ADD(N, M)  (N + M)
534 
535 /* Signed min/max */
536 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
537 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
538 
539 /* Unsigned min/max */
540 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
541 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
542 
543 /*
544  *** load and store whole register instructions
545  */
546 static void
547 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
548                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
549                 MMUAccessType access_type)
550 {
551     uint32_t i, k, off, pos;
552     uint32_t nf = vext_nf(desc);
553     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
554     uint32_t max_elems = vlenb >> esz;
555 
556     k = env->vstart / max_elems;
557     off = env->vstart % max_elems;
558 
559     if (off) {
560         /* load/store rest of elements of current segment pointed by vstart */
561         for (pos = off; pos < max_elems; pos++, env->vstart++) {
562             target_ulong addr = base + ((pos + k * max_elems) << esz);
563             ldst_elem(env, addr, pos + k * max_elems, vd, ra);
564         }
565         k++;
566     }
567 
568     /* load/store elements for rest of segments */
569     for (; k < nf; k++) {
570         for (i = 0; i < max_elems; i++, env->vstart++) {
571             target_ulong addr = base + ((i + k * max_elems) << esz);
572             ldst_elem(env, addr, i + k * max_elems, vd, ra);
573         }
574     }
575 
576     env->vstart = 0;
577 }
578 
579 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
580 void HELPER(NAME)(void *vd, target_ulong base,       \
581                   CPURISCVState *env, uint32_t desc) \
582 {                                                    \
583     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
584                     ctzl(sizeof(ETYPE)), GETPC(),    \
585                     MMU_DATA_LOAD);                  \
586 }
587 
588 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
589 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
590 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
591 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
592 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
593 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
594 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
595 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
596 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
597 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
598 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
599 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
600 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
601 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
602 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
603 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
604 
605 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
606 void HELPER(NAME)(void *vd, target_ulong base,       \
607                   CPURISCVState *env, uint32_t desc) \
608 {                                                    \
609     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
610                     ctzl(sizeof(ETYPE)), GETPC(),    \
611                     MMU_DATA_STORE);                 \
612 }
613 
614 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
615 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
616 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
617 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
618 
619 /*
620  *** Vector Integer Arithmetic Instructions
621  */
622 
623 /* expand macro args before macro */
624 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
625 
626 /* (TD, T1, T2, TX1, TX2) */
627 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
628 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
629 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
630 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
631 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
632 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
633 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
634 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
635 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
636 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
637 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
638 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
639 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
640 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
641 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
642 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
643 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
644 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
645 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
646 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
647 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
648 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
649 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
650 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
651 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
652 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
653 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
654 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
655 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
656 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
657 
658 /* operation of two vector elements */
659 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
660 
661 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
662 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
663 {                                                               \
664     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
665     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
666     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
667 }
668 #define DO_SUB(N, M) (N - M)
669 #define DO_RSUB(N, M) (M - N)
670 
671 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
672 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
673 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
674 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
675 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
676 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
677 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
678 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
679 
680 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
681                        CPURISCVState *env, uint32_t desc,
682                        uint32_t esz, uint32_t dsz,
683                        opivv2_fn *fn)
684 {
685     uint32_t vm = vext_vm(desc);
686     uint32_t vl = env->vl;
687     uint32_t i;
688 
689     for (i = env->vstart; i < vl; i++) {
690         if (!vm && !vext_elem_mask(v0, i)) {
691             continue;
692         }
693         fn(vd, vs1, vs2, i);
694     }
695     env->vstart = 0;
696 }
697 
698 /* generate the helpers for OPIVV */
699 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
700 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
701                   void *vs2, CPURISCVState *env,          \
702                   uint32_t desc)                          \
703 {                                                         \
704     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
705                do_##NAME);                                \
706 }
707 
708 GEN_VEXT_VV(vadd_vv_b, 1, 1)
709 GEN_VEXT_VV(vadd_vv_h, 2, 2)
710 GEN_VEXT_VV(vadd_vv_w, 4, 4)
711 GEN_VEXT_VV(vadd_vv_d, 8, 8)
712 GEN_VEXT_VV(vsub_vv_b, 1, 1)
713 GEN_VEXT_VV(vsub_vv_h, 2, 2)
714 GEN_VEXT_VV(vsub_vv_w, 4, 4)
715 GEN_VEXT_VV(vsub_vv_d, 8, 8)
716 
717 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
718 
719 /*
720  * (T1)s1 gives the real operator type.
721  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
722  */
723 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
724 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
725 {                                                                   \
726     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
727     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
728 }
729 
730 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
731 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
732 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
733 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
734 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
735 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
736 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
737 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
738 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
739 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
740 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
741 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
742 
743 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
744                        CPURISCVState *env, uint32_t desc,
745                        uint32_t esz, uint32_t dsz,
746                        opivx2_fn fn)
747 {
748     uint32_t vm = vext_vm(desc);
749     uint32_t vl = env->vl;
750     uint32_t i;
751 
752     for (i = env->vstart; i < vl; i++) {
753         if (!vm && !vext_elem_mask(v0, i)) {
754             continue;
755         }
756         fn(vd, s1, vs2, i);
757     }
758     env->vstart = 0;
759 }
760 
761 /* generate the helpers for OPIVX */
762 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
763 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
764                   void *vs2, CPURISCVState *env,          \
765                   uint32_t desc)                          \
766 {                                                         \
767     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
768                do_##NAME);                                \
769 }
770 
771 GEN_VEXT_VX(vadd_vx_b, 1, 1)
772 GEN_VEXT_VX(vadd_vx_h, 2, 2)
773 GEN_VEXT_VX(vadd_vx_w, 4, 4)
774 GEN_VEXT_VX(vadd_vx_d, 8, 8)
775 GEN_VEXT_VX(vsub_vx_b, 1, 1)
776 GEN_VEXT_VX(vsub_vx_h, 2, 2)
777 GEN_VEXT_VX(vsub_vx_w, 4, 4)
778 GEN_VEXT_VX(vsub_vx_d, 8, 8)
779 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
780 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
781 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
782 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
783 
784 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
785 {
786     intptr_t oprsz = simd_oprsz(desc);
787     intptr_t i;
788 
789     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
790         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
791     }
792 }
793 
794 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
795 {
796     intptr_t oprsz = simd_oprsz(desc);
797     intptr_t i;
798 
799     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
800         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
801     }
802 }
803 
804 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
805 {
806     intptr_t oprsz = simd_oprsz(desc);
807     intptr_t i;
808 
809     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
810         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
811     }
812 }
813 
814 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
815 {
816     intptr_t oprsz = simd_oprsz(desc);
817     intptr_t i;
818 
819     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
820         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
821     }
822 }
823 
824 /* Vector Widening Integer Add/Subtract */
825 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
826 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
827 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
828 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
829 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
830 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
831 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
832 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
833 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
834 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
835 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
836 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
837 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
838 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
839 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
840 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
841 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
842 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
843 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
844 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
845 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
846 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
847 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
848 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
849 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
850 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
851 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
852 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
853 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
854 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
855 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
856 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
857 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
858 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
859 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
860 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
861 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
862 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
863 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
864 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
865 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
866 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
867 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
868 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
869 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
870 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
871 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
872 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
873 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
874 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
875 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
876 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
877 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
878 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
879 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
880 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
881 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
882 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
883 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
884 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
885 
886 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
887 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
888 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
889 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
890 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
891 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
892 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
893 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
894 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
895 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
896 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
897 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
898 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
899 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
900 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
901 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
902 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
903 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
904 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
905 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
906 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
907 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
908 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
909 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
910 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
911 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
912 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
913 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
914 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
915 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
916 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
917 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
918 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
919 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
920 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
921 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
922 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
923 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
924 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
925 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
926 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
927 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
928 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
929 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
930 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
931 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
932 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
933 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
934 
935 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
936 #define DO_VADC(N, M, C) (N + M + C)
937 #define DO_VSBC(N, M, C) (N - M - C)
938 
939 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
940 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
941                   CPURISCVState *env, uint32_t desc)          \
942 {                                                             \
943     uint32_t vl = env->vl;                                    \
944     uint32_t i;                                               \
945                                                               \
946     for (i = env->vstart; i < vl; i++) {                      \
947         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
948         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
949         ETYPE carry = vext_elem_mask(v0, i);                  \
950                                                               \
951         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
952     }                                                         \
953     env->vstart = 0;                                          \
954 }
955 
956 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
957 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
958 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
959 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
960 
961 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
962 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
963 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
964 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
965 
966 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
967 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
968                   CPURISCVState *env, uint32_t desc)                     \
969 {                                                                        \
970     uint32_t vl = env->vl;                                               \
971     uint32_t i;                                                          \
972                                                                          \
973     for (i = env->vstart; i < vl; i++) {                                 \
974         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
975         ETYPE carry = vext_elem_mask(v0, i);                             \
976                                                                          \
977         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
978     }                                                                    \
979     env->vstart = 0;                                          \
980 }
981 
982 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
983 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
984 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
985 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
986 
987 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
988 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
989 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
990 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
991 
992 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
993                           (__typeof(N))(N + M) < N)
994 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
995 
996 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
997 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
998                   CPURISCVState *env, uint32_t desc)          \
999 {                                                             \
1000     uint32_t vl = env->vl;                                    \
1001     uint32_t vm = vext_vm(desc);                              \
1002     uint32_t i;                                               \
1003                                                               \
1004     for (i = env->vstart; i < vl; i++) {                      \
1005         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1006         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1007         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1008         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1009     }                                                         \
1010     env->vstart = 0;                                          \
1011 }
1012 
1013 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1014 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1015 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1016 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1017 
1018 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1019 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1020 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1021 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1022 
1023 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1024 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1025                   void *vs2, CPURISCVState *env, uint32_t desc) \
1026 {                                                               \
1027     uint32_t vl = env->vl;                                      \
1028     uint32_t vm = vext_vm(desc);                                \
1029     uint32_t i;                                                 \
1030                                                                 \
1031     for (i = env->vstart; i < vl; i++) {                        \
1032         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1033         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1034         vext_set_elem_mask(vd, i,                               \
1035                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1036     }                                                           \
1037     env->vstart = 0;                                            \
1038 }
1039 
1040 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1041 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1042 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1043 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1044 
1045 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1046 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1047 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1048 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1049 
1050 /* Vector Bitwise Logical Instructions */
1051 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1052 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1053 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1054 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1055 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1056 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1057 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1058 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1059 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1060 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1061 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1062 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1063 GEN_VEXT_VV(vand_vv_b, 1, 1)
1064 GEN_VEXT_VV(vand_vv_h, 2, 2)
1065 GEN_VEXT_VV(vand_vv_w, 4, 4)
1066 GEN_VEXT_VV(vand_vv_d, 8, 8)
1067 GEN_VEXT_VV(vor_vv_b, 1, 1)
1068 GEN_VEXT_VV(vor_vv_h, 2, 2)
1069 GEN_VEXT_VV(vor_vv_w, 4, 4)
1070 GEN_VEXT_VV(vor_vv_d, 8, 8)
1071 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1072 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1073 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1074 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1075 
1076 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1077 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1078 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1079 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1080 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1081 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1082 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1083 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1084 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1085 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1086 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1087 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1088 GEN_VEXT_VX(vand_vx_b, 1, 1)
1089 GEN_VEXT_VX(vand_vx_h, 2, 2)
1090 GEN_VEXT_VX(vand_vx_w, 4, 4)
1091 GEN_VEXT_VX(vand_vx_d, 8, 8)
1092 GEN_VEXT_VX(vor_vx_b, 1, 1)
1093 GEN_VEXT_VX(vor_vx_h, 2, 2)
1094 GEN_VEXT_VX(vor_vx_w, 4, 4)
1095 GEN_VEXT_VX(vor_vx_d, 8, 8)
1096 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1097 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1098 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1099 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1100 
1101 /* Vector Single-Width Bit Shift Instructions */
1102 #define DO_SLL(N, M)  (N << (M))
1103 #define DO_SRL(N, M)  (N >> (M))
1104 
1105 /* generate the helpers for shift instructions with two vector operators */
1106 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1107 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1108                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1109 {                                                                         \
1110     uint32_t vm = vext_vm(desc);                                          \
1111     uint32_t vl = env->vl;                                                \
1112     uint32_t i;                                                           \
1113                                                                           \
1114     for (i = env->vstart; i < vl; i++) {                                  \
1115         if (!vm && !vext_elem_mask(v0, i)) {                              \
1116             continue;                                                     \
1117         }                                                                 \
1118         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1119         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1120         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1121     }                                                                     \
1122     env->vstart = 0;                                                      \
1123 }
1124 
1125 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1126 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1127 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1128 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1129 
1130 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1131 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1132 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1133 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1134 
1135 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1136 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1137 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1138 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1139 
1140 /* generate the helpers for shift instructions with one vector and one scalar */
1141 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1142 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1143         void *vs2, CPURISCVState *env, uint32_t desc)       \
1144 {                                                           \
1145     uint32_t vm = vext_vm(desc);                            \
1146     uint32_t vl = env->vl;                                  \
1147     uint32_t i;                                             \
1148                                                             \
1149     for (i = env->vstart; i < vl; i++) {                    \
1150         if (!vm && !vext_elem_mask(v0, i)) {                \
1151             continue;                                       \
1152         }                                                   \
1153         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1154         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1155     }                                                       \
1156     env->vstart = 0;                                        \
1157 }
1158 
1159 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1160 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1161 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1162 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1163 
1164 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1165 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1166 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1167 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1168 
1169 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1170 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1171 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1172 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1173 
1174 /* Vector Narrowing Integer Right Shift Instructions */
1175 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1176 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1177 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1178 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1179 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1180 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1181 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1182 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1183 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1184 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1185 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1186 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1187 
1188 /* Vector Integer Comparison Instructions */
1189 #define DO_MSEQ(N, M) (N == M)
1190 #define DO_MSNE(N, M) (N != M)
1191 #define DO_MSLT(N, M) (N < M)
1192 #define DO_MSLE(N, M) (N <= M)
1193 #define DO_MSGT(N, M) (N > M)
1194 
1195 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1196 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1197                   CPURISCVState *env, uint32_t desc)          \
1198 {                                                             \
1199     uint32_t vm = vext_vm(desc);                              \
1200     uint32_t vl = env->vl;                                    \
1201     uint32_t i;                                               \
1202                                                               \
1203     for (i = env->vstart; i < vl; i++) {                      \
1204         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1205         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1206         if (!vm && !vext_elem_mask(v0, i)) {                  \
1207             continue;                                         \
1208         }                                                     \
1209         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1210     }                                                         \
1211     env->vstart = 0;                                          \
1212 }
1213 
1214 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1215 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1216 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1217 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1218 
1219 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1220 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1221 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1222 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1223 
1224 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1225 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1226 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1227 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1228 
1229 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1230 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1231 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1232 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1233 
1234 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1235 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1236 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1237 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1238 
1239 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1240 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1241 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1242 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1243 
1244 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1245 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1246                   CPURISCVState *env, uint32_t desc)                \
1247 {                                                                   \
1248     uint32_t vm = vext_vm(desc);                                    \
1249     uint32_t vl = env->vl;                                          \
1250     uint32_t i;                                                     \
1251                                                                     \
1252     for (i = env->vstart; i < vl; i++) {                            \
1253         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1254         if (!vm && !vext_elem_mask(v0, i)) {                        \
1255             continue;                                               \
1256         }                                                           \
1257         vext_set_elem_mask(vd, i,                                   \
1258                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1259     }                                                               \
1260     env->vstart = 0;                                                \
1261 }
1262 
1263 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1264 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1265 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1266 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1267 
1268 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1269 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1270 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1271 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1272 
1273 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1274 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1275 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1276 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1277 
1278 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1279 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1280 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1281 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1282 
1283 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1284 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1285 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1286 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1287 
1288 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1289 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1290 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1291 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1292 
1293 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1294 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1295 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1296 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1297 
1298 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1299 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1300 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1301 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1302 
1303 /* Vector Integer Min/Max Instructions */
1304 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1305 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1306 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1307 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1308 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1309 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1310 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1311 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1312 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1313 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1314 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1315 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1316 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1317 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1318 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1319 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1320 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1321 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1322 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1323 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1324 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1325 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1326 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1327 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1328 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1329 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1330 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1331 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1332 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1333 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1334 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1335 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1336 
1337 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1338 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1339 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1340 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1341 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1342 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1343 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1344 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1345 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1346 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1347 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1348 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1349 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1350 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1351 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1352 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1353 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1354 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1355 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1356 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1357 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1358 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1359 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1360 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1361 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1362 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1363 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1364 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1365 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1366 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1367 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1368 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1369 
1370 /* Vector Single-Width Integer Multiply Instructions */
1371 #define DO_MUL(N, M) (N * M)
1372 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1373 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1374 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1375 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1376 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1377 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1378 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1379 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1380 
1381 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1382 {
1383     return (int16_t)s2 * (int16_t)s1 >> 8;
1384 }
1385 
1386 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1387 {
1388     return (int32_t)s2 * (int32_t)s1 >> 16;
1389 }
1390 
1391 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1392 {
1393     return (int64_t)s2 * (int64_t)s1 >> 32;
1394 }
1395 
1396 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1397 {
1398     uint64_t hi_64, lo_64;
1399 
1400     muls64(&lo_64, &hi_64, s1, s2);
1401     return hi_64;
1402 }
1403 
1404 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1405 {
1406     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1407 }
1408 
1409 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1410 {
1411     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1412 }
1413 
1414 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1415 {
1416     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1417 }
1418 
1419 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1420 {
1421     uint64_t hi_64, lo_64;
1422 
1423     mulu64(&lo_64, &hi_64, s2, s1);
1424     return hi_64;
1425 }
1426 
1427 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1428 {
1429     return (int16_t)s2 * (uint16_t)s1 >> 8;
1430 }
1431 
1432 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1433 {
1434     return (int32_t)s2 * (uint32_t)s1 >> 16;
1435 }
1436 
1437 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1438 {
1439     return (int64_t)s2 * (uint64_t)s1 >> 32;
1440 }
1441 
1442 /*
1443  * Let  A = signed operand,
1444  *      B = unsigned operand
1445  *      P = mulu64(A, B), unsigned product
1446  *
1447  * LET  X = 2 ** 64  - A, 2's complement of A
1448  *      SP = signed product
1449  * THEN
1450  *      IF A < 0
1451  *          SP = -X * B
1452  *             = -(2 ** 64 - A) * B
1453  *             = A * B - 2 ** 64 * B
1454  *             = P - 2 ** 64 * B
1455  *      ELSE
1456  *          SP = P
1457  * THEN
1458  *      HI_P -= (A < 0 ? B : 0)
1459  */
1460 
1461 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1462 {
1463     uint64_t hi_64, lo_64;
1464 
1465     mulu64(&lo_64, &hi_64, s2, s1);
1466 
1467     hi_64 -= s2 < 0 ? s1 : 0;
1468     return hi_64;
1469 }
1470 
1471 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1472 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1473 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1474 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1475 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1476 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1477 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1478 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1479 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1480 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1481 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1482 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1483 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1484 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1485 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1486 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1487 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1488 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1489 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1490 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1491 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1492 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1493 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1494 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1495 
1496 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1497 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1498 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1499 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1500 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1501 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1502 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1503 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1504 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1505 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1506 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1507 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1508 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1509 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1510 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1511 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1512 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1513 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1514 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1515 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1516 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1517 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1518 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1519 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1520 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1521 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1522 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1523 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1524 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1525 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1526 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1527 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1528 
1529 /* Vector Integer Divide Instructions */
1530 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1531 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1532 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1533         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1534 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1535         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1536 
1537 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1538 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1539 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1540 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1541 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1542 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1543 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1544 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1545 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1546 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1547 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1548 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1549 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1550 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1551 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1552 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1553 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1554 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1555 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1556 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1557 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1558 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1559 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1560 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1561 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1562 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1563 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1564 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1565 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1566 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1567 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1568 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1569 
1570 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1571 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1572 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1573 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1574 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1575 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1576 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1577 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1578 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1579 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1580 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1581 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1582 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1583 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1584 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1585 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1586 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1587 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1588 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1589 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1590 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1591 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1592 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1593 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1594 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1595 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1596 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1597 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1598 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1599 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1600 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1601 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1602 
1603 /* Vector Widening Integer Multiply Instructions */
1604 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1605 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1606 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1607 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1608 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1609 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1610 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1611 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1612 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1613 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1614 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1615 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1616 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1617 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1618 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1619 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1620 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1621 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1622 
1623 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1624 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1625 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1626 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1627 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1628 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1629 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1630 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1631 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1632 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1633 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1634 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1635 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1636 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1637 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1638 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1639 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1640 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1641 
1642 /* Vector Single-Width Integer Multiply-Add Instructions */
1643 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1644 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1645 {                                                                  \
1646     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1647     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1648     TD d = *((TD *)vd + HD(i));                                    \
1649     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1650 }
1651 
1652 #define DO_MACC(N, M, D) (M * N + D)
1653 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1654 #define DO_MADD(N, M, D) (M * D + N)
1655 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1656 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1657 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1658 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1659 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1660 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1661 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1662 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1663 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1664 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1665 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1666 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1667 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1668 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1669 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1670 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1671 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1672 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1673 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1674 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1675 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1676 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1677 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1678 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1679 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1680 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1681 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1682 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1683 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1684 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1685 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1686 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1687 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1688 
1689 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1690 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1691 {                                                                   \
1692     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1693     TD d = *((TD *)vd + HD(i));                                     \
1694     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1695 }
1696 
1697 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1698 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1699 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1700 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1701 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1702 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1703 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1704 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1705 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1706 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1707 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1708 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1709 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1710 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1711 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1712 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1713 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1714 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1715 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1716 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1717 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1718 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1719 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1720 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1721 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1722 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1723 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1724 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1725 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1726 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1727 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1728 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1729 
1730 /* Vector Widening Integer Multiply-Add Instructions */
1731 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1732 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1733 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1734 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1735 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1736 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1737 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1738 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1739 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1740 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1741 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1742 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1743 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1744 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1745 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1746 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1747 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1748 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1749 
1750 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1751 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1752 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1753 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1754 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1755 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1756 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1757 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1758 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1759 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1760 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1761 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1762 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1763 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1764 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1765 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1766 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1767 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1768 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1769 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1770 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1771 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1772 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1773 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1774 
1775 /* Vector Integer Merge and Move Instructions */
1776 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1777 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1778                   uint32_t desc)                                     \
1779 {                                                                    \
1780     uint32_t vl = env->vl;                                           \
1781     uint32_t i;                                                      \
1782                                                                      \
1783     for (i = env->vstart; i < vl; i++) {                             \
1784         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1785         *((ETYPE *)vd + H(i)) = s1;                                  \
1786     }                                                                \
1787     env->vstart = 0;                                                 \
1788 }
1789 
1790 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1791 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1792 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1793 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1794 
1795 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1796 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1797                   uint32_t desc)                                     \
1798 {                                                                    \
1799     uint32_t vl = env->vl;                                           \
1800     uint32_t i;                                                      \
1801                                                                      \
1802     for (i = env->vstart; i < vl; i++) {                             \
1803         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1804     }                                                                \
1805     env->vstart = 0;                                                 \
1806 }
1807 
1808 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1809 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1810 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1811 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1812 
1813 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1814 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1815                   CPURISCVState *env, uint32_t desc)                 \
1816 {                                                                    \
1817     uint32_t vl = env->vl;                                           \
1818     uint32_t i;                                                      \
1819                                                                      \
1820     for (i = env->vstart; i < vl; i++) {                             \
1821         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1822         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1823     }                                                                \
1824     env->vstart = 0;                                                 \
1825 }
1826 
1827 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1828 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1829 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1830 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1831 
1832 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1833 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1834                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1835 {                                                                    \
1836     uint32_t vl = env->vl;                                           \
1837     uint32_t i;                                                      \
1838                                                                      \
1839     for (i = env->vstart; i < vl; i++) {                             \
1840         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1841         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1842                    (ETYPE)(target_long)s1);                          \
1843         *((ETYPE *)vd + H(i)) = d;                                   \
1844     }                                                                \
1845     env->vstart = 0;                                                 \
1846 }
1847 
1848 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1849 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1850 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1851 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1852 
1853 /*
1854  *** Vector Fixed-Point Arithmetic Instructions
1855  */
1856 
1857 /* Vector Single-Width Saturating Add and Subtract */
1858 
1859 /*
1860  * As fixed point instructions probably have round mode and saturation,
1861  * define common macros for fixed point here.
1862  */
1863 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1864                           CPURISCVState *env, int vxrm);
1865 
1866 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1867 static inline void                                                  \
1868 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1869           CPURISCVState *env, int vxrm)                             \
1870 {                                                                   \
1871     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1872     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1873     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1874 }
1875 
1876 static inline void
1877 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1878              CPURISCVState *env,
1879              uint32_t vl, uint32_t vm, int vxrm,
1880              opivv2_rm_fn *fn)
1881 {
1882     for (uint32_t i = env->vstart; i < vl; i++) {
1883         if (!vm && !vext_elem_mask(v0, i)) {
1884             continue;
1885         }
1886         fn(vd, vs1, vs2, i, env, vxrm);
1887     }
1888     env->vstart = 0;
1889 }
1890 
1891 static inline void
1892 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1893              CPURISCVState *env,
1894              uint32_t desc, uint32_t esz, uint32_t dsz,
1895              opivv2_rm_fn *fn)
1896 {
1897     uint32_t vm = vext_vm(desc);
1898     uint32_t vl = env->vl;
1899 
1900     switch (env->vxrm) {
1901     case 0: /* rnu */
1902         vext_vv_rm_1(vd, v0, vs1, vs2,
1903                      env, vl, vm, 0, fn);
1904         break;
1905     case 1: /* rne */
1906         vext_vv_rm_1(vd, v0, vs1, vs2,
1907                      env, vl, vm, 1, fn);
1908         break;
1909     case 2: /* rdn */
1910         vext_vv_rm_1(vd, v0, vs1, vs2,
1911                      env, vl, vm, 2, fn);
1912         break;
1913     default: /* rod */
1914         vext_vv_rm_1(vd, v0, vs1, vs2,
1915                      env, vl, vm, 3, fn);
1916         break;
1917     }
1918 }
1919 
1920 /* generate helpers for fixed point instructions with OPIVV format */
1921 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1922 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1923                   CPURISCVState *env, uint32_t desc)            \
1924 {                                                               \
1925     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1926                  do_##NAME);                                    \
1927 }
1928 
1929 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1930 {
1931     uint8_t res = a + b;
1932     if (res < a) {
1933         res = UINT8_MAX;
1934         env->vxsat = 0x1;
1935     }
1936     return res;
1937 }
1938 
1939 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1940                                uint16_t b)
1941 {
1942     uint16_t res = a + b;
1943     if (res < a) {
1944         res = UINT16_MAX;
1945         env->vxsat = 0x1;
1946     }
1947     return res;
1948 }
1949 
1950 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1951                                uint32_t b)
1952 {
1953     uint32_t res = a + b;
1954     if (res < a) {
1955         res = UINT32_MAX;
1956         env->vxsat = 0x1;
1957     }
1958     return res;
1959 }
1960 
1961 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1962                                uint64_t b)
1963 {
1964     uint64_t res = a + b;
1965     if (res < a) {
1966         res = UINT64_MAX;
1967         env->vxsat = 0x1;
1968     }
1969     return res;
1970 }
1971 
1972 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1973 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1974 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
1975 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
1976 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
1977 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
1978 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
1979 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
1980 
1981 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
1982                           CPURISCVState *env, int vxrm);
1983 
1984 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
1985 static inline void                                                  \
1986 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
1987           CPURISCVState *env, int vxrm)                             \
1988 {                                                                   \
1989     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1990     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
1991 }
1992 
1993 static inline void
1994 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
1995              CPURISCVState *env,
1996              uint32_t vl, uint32_t vm, int vxrm,
1997              opivx2_rm_fn *fn)
1998 {
1999     for (uint32_t i = env->vstart; i < vl; i++) {
2000         if (!vm && !vext_elem_mask(v0, i)) {
2001             continue;
2002         }
2003         fn(vd, s1, vs2, i, env, vxrm);
2004     }
2005     env->vstart = 0;
2006 }
2007 
2008 static inline void
2009 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2010              CPURISCVState *env,
2011              uint32_t desc, uint32_t esz, uint32_t dsz,
2012              opivx2_rm_fn *fn)
2013 {
2014     uint32_t vm = vext_vm(desc);
2015     uint32_t vl = env->vl;
2016 
2017     switch (env->vxrm) {
2018     case 0: /* rnu */
2019         vext_vx_rm_1(vd, v0, s1, vs2,
2020                      env, vl, vm, 0, fn);
2021         break;
2022     case 1: /* rne */
2023         vext_vx_rm_1(vd, v0, s1, vs2,
2024                      env, vl, vm, 1, fn);
2025         break;
2026     case 2: /* rdn */
2027         vext_vx_rm_1(vd, v0, s1, vs2,
2028                      env, vl, vm, 2, fn);
2029         break;
2030     default: /* rod */
2031         vext_vx_rm_1(vd, v0, s1, vs2,
2032                      env, vl, vm, 3, fn);
2033         break;
2034     }
2035 }
2036 
2037 /* generate helpers for fixed point instructions with OPIVX format */
2038 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2039 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2040         void *vs2, CPURISCVState *env, uint32_t desc)     \
2041 {                                                         \
2042     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2043                  do_##NAME);                              \
2044 }
2045 
2046 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2047 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2048 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2049 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2050 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2051 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2052 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2053 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2054 
2055 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2056 {
2057     int8_t res = a + b;
2058     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2059         res = a > 0 ? INT8_MAX : INT8_MIN;
2060         env->vxsat = 0x1;
2061     }
2062     return res;
2063 }
2064 
2065 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2066 {
2067     int16_t res = a + b;
2068     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2069         res = a > 0 ? INT16_MAX : INT16_MIN;
2070         env->vxsat = 0x1;
2071     }
2072     return res;
2073 }
2074 
2075 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2076 {
2077     int32_t res = a + b;
2078     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2079         res = a > 0 ? INT32_MAX : INT32_MIN;
2080         env->vxsat = 0x1;
2081     }
2082     return res;
2083 }
2084 
2085 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2086 {
2087     int64_t res = a + b;
2088     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2089         res = a > 0 ? INT64_MAX : INT64_MIN;
2090         env->vxsat = 0x1;
2091     }
2092     return res;
2093 }
2094 
2095 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2096 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2097 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2098 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2099 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2100 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2101 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2102 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2103 
2104 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2105 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2106 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2107 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2108 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2109 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2110 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2111 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2112 
2113 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2114 {
2115     uint8_t res = a - b;
2116     if (res > a) {
2117         res = 0;
2118         env->vxsat = 0x1;
2119     }
2120     return res;
2121 }
2122 
2123 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2124                                uint16_t b)
2125 {
2126     uint16_t res = a - b;
2127     if (res > a) {
2128         res = 0;
2129         env->vxsat = 0x1;
2130     }
2131     return res;
2132 }
2133 
2134 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2135                                uint32_t b)
2136 {
2137     uint32_t res = a - b;
2138     if (res > a) {
2139         res = 0;
2140         env->vxsat = 0x1;
2141     }
2142     return res;
2143 }
2144 
2145 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2146                                uint64_t b)
2147 {
2148     uint64_t res = a - b;
2149     if (res > a) {
2150         res = 0;
2151         env->vxsat = 0x1;
2152     }
2153     return res;
2154 }
2155 
2156 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2157 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2158 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2159 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2160 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2161 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2162 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2163 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2164 
2165 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2166 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2167 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2168 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2169 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2170 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2171 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2172 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2173 
2174 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2175 {
2176     int8_t res = a - b;
2177     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2178         res = a >= 0 ? INT8_MAX : INT8_MIN;
2179         env->vxsat = 0x1;
2180     }
2181     return res;
2182 }
2183 
2184 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2185 {
2186     int16_t res = a - b;
2187     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2188         res = a >= 0 ? INT16_MAX : INT16_MIN;
2189         env->vxsat = 0x1;
2190     }
2191     return res;
2192 }
2193 
2194 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2195 {
2196     int32_t res = a - b;
2197     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2198         res = a >= 0 ? INT32_MAX : INT32_MIN;
2199         env->vxsat = 0x1;
2200     }
2201     return res;
2202 }
2203 
2204 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2205 {
2206     int64_t res = a - b;
2207     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2208         res = a >= 0 ? INT64_MAX : INT64_MIN;
2209         env->vxsat = 0x1;
2210     }
2211     return res;
2212 }
2213 
2214 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2215 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2216 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2217 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2218 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2219 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2220 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2221 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2222 
2223 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2224 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2225 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2226 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2227 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2228 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2229 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2230 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2231 
2232 /* Vector Single-Width Averaging Add and Subtract */
2233 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2234 {
2235     uint8_t d = extract64(v, shift, 1);
2236     uint8_t d1;
2237     uint64_t D1, D2;
2238 
2239     if (shift == 0 || shift > 64) {
2240         return 0;
2241     }
2242 
2243     d1 = extract64(v, shift - 1, 1);
2244     D1 = extract64(v, 0, shift);
2245     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2246         return d1;
2247     } else if (vxrm == 1) { /* round-to-nearest-even */
2248         if (shift > 1) {
2249             D2 = extract64(v, 0, shift - 1);
2250             return d1 & ((D2 != 0) | d);
2251         } else {
2252             return d1 & d;
2253         }
2254     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2255         return !d & (D1 != 0);
2256     }
2257     return 0; /* round-down (truncate) */
2258 }
2259 
2260 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2261 {
2262     int64_t res = (int64_t)a + b;
2263     uint8_t round = get_round(vxrm, res, 1);
2264 
2265     return (res >> 1) + round;
2266 }
2267 
2268 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2269 {
2270     int64_t res = a + b;
2271     uint8_t round = get_round(vxrm, res, 1);
2272     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2273 
2274     /* With signed overflow, bit 64 is inverse of bit 63. */
2275     return ((res >> 1) ^ over) + round;
2276 }
2277 
2278 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2279 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2280 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2281 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2282 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2283 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2284 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2285 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2286 
2287 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2288 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2289 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2290 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2291 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2292 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2293 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2294 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2295 
2296 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2297                                uint32_t a, uint32_t b)
2298 {
2299     uint64_t res = (uint64_t)a + b;
2300     uint8_t round = get_round(vxrm, res, 1);
2301 
2302     return (res >> 1) + round;
2303 }
2304 
2305 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2306                                uint64_t a, uint64_t b)
2307 {
2308     uint64_t res = a + b;
2309     uint8_t round = get_round(vxrm, res, 1);
2310     uint64_t over = (uint64_t)(res < a) << 63;
2311 
2312     return ((res >> 1) | over) + round;
2313 }
2314 
2315 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2316 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2317 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2318 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2319 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2320 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2321 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2322 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2323 
2324 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2325 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2326 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2327 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2328 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2329 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2330 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2331 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2332 
2333 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2334 {
2335     int64_t res = (int64_t)a - b;
2336     uint8_t round = get_round(vxrm, res, 1);
2337 
2338     return (res >> 1) + round;
2339 }
2340 
2341 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2342 {
2343     int64_t res = (int64_t)a - b;
2344     uint8_t round = get_round(vxrm, res, 1);
2345     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2346 
2347     /* With signed overflow, bit 64 is inverse of bit 63. */
2348     return ((res >> 1) ^ over) + round;
2349 }
2350 
2351 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2352 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2353 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2354 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2355 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2356 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2357 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2358 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2359 
2360 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2361 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2362 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2363 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2364 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2365 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2366 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2367 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2368 
2369 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2370                                uint32_t a, uint32_t b)
2371 {
2372     int64_t res = (int64_t)a - b;
2373     uint8_t round = get_round(vxrm, res, 1);
2374 
2375     return (res >> 1) + round;
2376 }
2377 
2378 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2379                                uint64_t a, uint64_t b)
2380 {
2381     uint64_t res = (uint64_t)a - b;
2382     uint8_t round = get_round(vxrm, res, 1);
2383     uint64_t over = (uint64_t)(res > a) << 63;
2384 
2385     return ((res >> 1) | over) + round;
2386 }
2387 
2388 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2389 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2390 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2391 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2392 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2393 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2394 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2395 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2396 
2397 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2398 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2399 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2400 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2401 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2402 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2403 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2404 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2405 
2406 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2407 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2408 {
2409     uint8_t round;
2410     int16_t res;
2411 
2412     res = (int16_t)a * (int16_t)b;
2413     round = get_round(vxrm, res, 7);
2414     res   = (res >> 7) + round;
2415 
2416     if (res > INT8_MAX) {
2417         env->vxsat = 0x1;
2418         return INT8_MAX;
2419     } else if (res < INT8_MIN) {
2420         env->vxsat = 0x1;
2421         return INT8_MIN;
2422     } else {
2423         return res;
2424     }
2425 }
2426 
2427 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2428 {
2429     uint8_t round;
2430     int32_t res;
2431 
2432     res = (int32_t)a * (int32_t)b;
2433     round = get_round(vxrm, res, 15);
2434     res   = (res >> 15) + round;
2435 
2436     if (res > INT16_MAX) {
2437         env->vxsat = 0x1;
2438         return INT16_MAX;
2439     } else if (res < INT16_MIN) {
2440         env->vxsat = 0x1;
2441         return INT16_MIN;
2442     } else {
2443         return res;
2444     }
2445 }
2446 
2447 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2448 {
2449     uint8_t round;
2450     int64_t res;
2451 
2452     res = (int64_t)a * (int64_t)b;
2453     round = get_round(vxrm, res, 31);
2454     res   = (res >> 31) + round;
2455 
2456     if (res > INT32_MAX) {
2457         env->vxsat = 0x1;
2458         return INT32_MAX;
2459     } else if (res < INT32_MIN) {
2460         env->vxsat = 0x1;
2461         return INT32_MIN;
2462     } else {
2463         return res;
2464     }
2465 }
2466 
2467 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2468 {
2469     uint8_t round;
2470     uint64_t hi_64, lo_64;
2471     int64_t res;
2472 
2473     if (a == INT64_MIN && b == INT64_MIN) {
2474         env->vxsat = 1;
2475         return INT64_MAX;
2476     }
2477 
2478     muls64(&lo_64, &hi_64, a, b);
2479     round = get_round(vxrm, lo_64, 63);
2480     /*
2481      * Cannot overflow, as there are always
2482      * 2 sign bits after multiply.
2483      */
2484     res = (hi_64 << 1) | (lo_64 >> 63);
2485     if (round) {
2486         if (res == INT64_MAX) {
2487             env->vxsat = 1;
2488         } else {
2489             res += 1;
2490         }
2491     }
2492     return res;
2493 }
2494 
2495 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2496 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2497 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2498 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2499 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2500 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2501 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2502 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2503 
2504 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2505 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2506 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2507 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2508 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2509 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2510 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2511 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2512 
2513 /* Vector Single-Width Scaling Shift Instructions */
2514 static inline uint8_t
2515 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2516 {
2517     uint8_t round, shift = b & 0x7;
2518     uint8_t res;
2519 
2520     round = get_round(vxrm, a, shift);
2521     res   = (a >> shift)  + round;
2522     return res;
2523 }
2524 static inline uint16_t
2525 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2526 {
2527     uint8_t round, shift = b & 0xf;
2528     uint16_t res;
2529 
2530     round = get_round(vxrm, a, shift);
2531     res   = (a >> shift)  + round;
2532     return res;
2533 }
2534 static inline uint32_t
2535 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2536 {
2537     uint8_t round, shift = b & 0x1f;
2538     uint32_t res;
2539 
2540     round = get_round(vxrm, a, shift);
2541     res   = (a >> shift)  + round;
2542     return res;
2543 }
2544 static inline uint64_t
2545 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2546 {
2547     uint8_t round, shift = b & 0x3f;
2548     uint64_t res;
2549 
2550     round = get_round(vxrm, a, shift);
2551     res   = (a >> shift)  + round;
2552     return res;
2553 }
2554 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2555 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2556 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2557 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2558 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2559 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2560 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2561 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2562 
2563 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2564 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2565 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2566 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2567 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2568 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2569 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2570 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2571 
2572 static inline int8_t
2573 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2574 {
2575     uint8_t round, shift = b & 0x7;
2576     int8_t res;
2577 
2578     round = get_round(vxrm, a, shift);
2579     res   = (a >> shift)  + round;
2580     return res;
2581 }
2582 static inline int16_t
2583 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2584 {
2585     uint8_t round, shift = b & 0xf;
2586     int16_t res;
2587 
2588     round = get_round(vxrm, a, shift);
2589     res   = (a >> shift)  + round;
2590     return res;
2591 }
2592 static inline int32_t
2593 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2594 {
2595     uint8_t round, shift = b & 0x1f;
2596     int32_t res;
2597 
2598     round = get_round(vxrm, a, shift);
2599     res   = (a >> shift)  + round;
2600     return res;
2601 }
2602 static inline int64_t
2603 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2604 {
2605     uint8_t round, shift = b & 0x3f;
2606     int64_t res;
2607 
2608     round = get_round(vxrm, a, shift);
2609     res   = (a >> shift)  + round;
2610     return res;
2611 }
2612 
2613 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2614 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2615 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2616 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2617 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2618 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2619 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2620 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2621 
2622 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2623 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2624 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2625 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2626 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2627 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2628 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2629 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2630 
2631 /* Vector Narrowing Fixed-Point Clip Instructions */
2632 static inline int8_t
2633 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2634 {
2635     uint8_t round, shift = b & 0xf;
2636     int16_t res;
2637 
2638     round = get_round(vxrm, a, shift);
2639     res   = (a >> shift)  + round;
2640     if (res > INT8_MAX) {
2641         env->vxsat = 0x1;
2642         return INT8_MAX;
2643     } else if (res < INT8_MIN) {
2644         env->vxsat = 0x1;
2645         return INT8_MIN;
2646     } else {
2647         return res;
2648     }
2649 }
2650 
2651 static inline int16_t
2652 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2653 {
2654     uint8_t round, shift = b & 0x1f;
2655     int32_t res;
2656 
2657     round = get_round(vxrm, a, shift);
2658     res   = (a >> shift)  + round;
2659     if (res > INT16_MAX) {
2660         env->vxsat = 0x1;
2661         return INT16_MAX;
2662     } else if (res < INT16_MIN) {
2663         env->vxsat = 0x1;
2664         return INT16_MIN;
2665     } else {
2666         return res;
2667     }
2668 }
2669 
2670 static inline int32_t
2671 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2672 {
2673     uint8_t round, shift = b & 0x3f;
2674     int64_t res;
2675 
2676     round = get_round(vxrm, a, shift);
2677     res   = (a >> shift)  + round;
2678     if (res > INT32_MAX) {
2679         env->vxsat = 0x1;
2680         return INT32_MAX;
2681     } else if (res < INT32_MIN) {
2682         env->vxsat = 0x1;
2683         return INT32_MIN;
2684     } else {
2685         return res;
2686     }
2687 }
2688 
2689 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2690 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2691 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2692 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2693 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2694 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2695 
2696 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2697 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2698 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2699 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2700 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2701 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2702 
2703 static inline uint8_t
2704 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2705 {
2706     uint8_t round, shift = b & 0xf;
2707     uint16_t res;
2708 
2709     round = get_round(vxrm, a, shift);
2710     res   = (a >> shift)  + round;
2711     if (res > UINT8_MAX) {
2712         env->vxsat = 0x1;
2713         return UINT8_MAX;
2714     } else {
2715         return res;
2716     }
2717 }
2718 
2719 static inline uint16_t
2720 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2721 {
2722     uint8_t round, shift = b & 0x1f;
2723     uint32_t res;
2724 
2725     round = get_round(vxrm, a, shift);
2726     res   = (a >> shift)  + round;
2727     if (res > UINT16_MAX) {
2728         env->vxsat = 0x1;
2729         return UINT16_MAX;
2730     } else {
2731         return res;
2732     }
2733 }
2734 
2735 static inline uint32_t
2736 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2737 {
2738     uint8_t round, shift = b & 0x3f;
2739     uint64_t res;
2740 
2741     round = get_round(vxrm, a, shift);
2742     res   = (a >> shift)  + round;
2743     if (res > UINT32_MAX) {
2744         env->vxsat = 0x1;
2745         return UINT32_MAX;
2746     } else {
2747         return res;
2748     }
2749 }
2750 
2751 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2752 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2753 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2754 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2755 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2756 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2757 
2758 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2759 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2760 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2761 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2762 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2763 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2764 
2765 /*
2766  *** Vector Float Point Arithmetic Instructions
2767  */
2768 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2769 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2770 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2771                       CPURISCVState *env)                      \
2772 {                                                              \
2773     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2774     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2775     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2776 }
2777 
2778 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2779 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2780                   void *vs2, CPURISCVState *env,          \
2781                   uint32_t desc)                          \
2782 {                                                         \
2783     uint32_t vm = vext_vm(desc);                          \
2784     uint32_t vl = env->vl;                                \
2785     uint32_t i;                                           \
2786                                                           \
2787     for (i = env->vstart; i < vl; i++) {                  \
2788         if (!vm && !vext_elem_mask(v0, i)) {              \
2789             continue;                                     \
2790         }                                                 \
2791         do_##NAME(vd, vs1, vs2, i, env);                  \
2792     }                                                     \
2793     env->vstart = 0;                                      \
2794 }
2795 
2796 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2797 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2798 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2799 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2800 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2801 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2802 
2803 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2804 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2805                       CPURISCVState *env)                      \
2806 {                                                              \
2807     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2808     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2809 }
2810 
2811 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2812 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2813                   void *vs2, CPURISCVState *env,          \
2814                   uint32_t desc)                          \
2815 {                                                         \
2816     uint32_t vm = vext_vm(desc);                          \
2817     uint32_t vl = env->vl;                                \
2818     uint32_t i;                                           \
2819                                                           \
2820     for (i = env->vstart; i < vl; i++) {                  \
2821         if (!vm && !vext_elem_mask(v0, i)) {              \
2822             continue;                                     \
2823         }                                                 \
2824         do_##NAME(vd, s1, vs2, i, env);                   \
2825     }                                                     \
2826     env->vstart = 0;                                      \
2827 }
2828 
2829 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2830 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2831 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2832 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2833 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2834 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2835 
2836 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2837 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2838 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2839 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2840 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2841 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2842 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2843 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2844 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2845 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2846 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2847 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2848 
2849 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2850 {
2851     return float16_sub(b, a, s);
2852 }
2853 
2854 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2855 {
2856     return float32_sub(b, a, s);
2857 }
2858 
2859 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2860 {
2861     return float64_sub(b, a, s);
2862 }
2863 
2864 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2865 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2866 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2867 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2868 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2869 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2870 
2871 /* Vector Widening Floating-Point Add/Subtract Instructions */
2872 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2873 {
2874     return float32_add(float16_to_float32(a, true, s),
2875             float16_to_float32(b, true, s), s);
2876 }
2877 
2878 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2879 {
2880     return float64_add(float32_to_float64(a, s),
2881             float32_to_float64(b, s), s);
2882 
2883 }
2884 
2885 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2886 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2887 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2888 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2889 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2890 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2891 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2892 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2893 
2894 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2895 {
2896     return float32_sub(float16_to_float32(a, true, s),
2897             float16_to_float32(b, true, s), s);
2898 }
2899 
2900 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2901 {
2902     return float64_sub(float32_to_float64(a, s),
2903             float32_to_float64(b, s), s);
2904 
2905 }
2906 
2907 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2908 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2909 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2910 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2911 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2912 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2913 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2914 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2915 
2916 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2917 {
2918     return float32_add(a, float16_to_float32(b, true, s), s);
2919 }
2920 
2921 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2922 {
2923     return float64_add(a, float32_to_float64(b, s), s);
2924 }
2925 
2926 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2927 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2928 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2929 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2930 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2931 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2932 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2933 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2934 
2935 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2936 {
2937     return float32_sub(a, float16_to_float32(b, true, s), s);
2938 }
2939 
2940 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2941 {
2942     return float64_sub(a, float32_to_float64(b, s), s);
2943 }
2944 
2945 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2946 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2947 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2948 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2949 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2950 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2951 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2952 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2953 
2954 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2955 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2956 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2957 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2958 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2959 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2960 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2961 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2962 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2963 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2964 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2965 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2966 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2967 
2968 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2969 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2970 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
2971 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
2972 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
2973 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
2974 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
2975 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
2976 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
2977 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
2978 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
2979 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
2980 
2981 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
2982 {
2983     return float16_div(b, a, s);
2984 }
2985 
2986 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
2987 {
2988     return float32_div(b, a, s);
2989 }
2990 
2991 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
2992 {
2993     return float64_div(b, a, s);
2994 }
2995 
2996 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
2997 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
2998 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
2999 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3000 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3001 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3002 
3003 /* Vector Widening Floating-Point Multiply */
3004 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3005 {
3006     return float32_mul(float16_to_float32(a, true, s),
3007             float16_to_float32(b, true, s), s);
3008 }
3009 
3010 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3011 {
3012     return float64_mul(float32_to_float64(a, s),
3013             float32_to_float64(b, s), s);
3014 
3015 }
3016 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3017 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3018 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3019 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3020 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3021 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3022 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3023 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3024 
3025 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3026 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3027 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3028         CPURISCVState *env)                                        \
3029 {                                                                  \
3030     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3031     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3032     TD d = *((TD *)vd + HD(i));                                    \
3033     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3034 }
3035 
3036 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3037 {
3038     return float16_muladd(a, b, d, 0, s);
3039 }
3040 
3041 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3042 {
3043     return float32_muladd(a, b, d, 0, s);
3044 }
3045 
3046 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3047 {
3048     return float64_muladd(a, b, d, 0, s);
3049 }
3050 
3051 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3052 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3053 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3054 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3055 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3056 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3057 
3058 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3059 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3060         CPURISCVState *env)                                       \
3061 {                                                                 \
3062     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3063     TD d = *((TD *)vd + HD(i));                                   \
3064     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3065 }
3066 
3067 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3068 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3069 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3070 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3071 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3072 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3073 
3074 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3075 {
3076     return float16_muladd(a, b, d,
3077             float_muladd_negate_c | float_muladd_negate_product, s);
3078 }
3079 
3080 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3081 {
3082     return float32_muladd(a, b, d,
3083             float_muladd_negate_c | float_muladd_negate_product, s);
3084 }
3085 
3086 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3087 {
3088     return float64_muladd(a, b, d,
3089             float_muladd_negate_c | float_muladd_negate_product, s);
3090 }
3091 
3092 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3093 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3094 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3095 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3096 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3097 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3098 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3099 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3100 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3101 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3102 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3103 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3104 
3105 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3106 {
3107     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3108 }
3109 
3110 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3111 {
3112     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3113 }
3114 
3115 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3116 {
3117     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3118 }
3119 
3120 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3121 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3122 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3123 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3124 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3125 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3126 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3127 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3128 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3129 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3130 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3131 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3132 
3133 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3134 {
3135     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3136 }
3137 
3138 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3139 {
3140     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3141 }
3142 
3143 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3144 {
3145     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3146 }
3147 
3148 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3149 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3150 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3151 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3152 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3153 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3154 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3155 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3156 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3157 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3158 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3159 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3160 
3161 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3162 {
3163     return float16_muladd(d, b, a, 0, s);
3164 }
3165 
3166 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3167 {
3168     return float32_muladd(d, b, a, 0, s);
3169 }
3170 
3171 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3172 {
3173     return float64_muladd(d, b, a, 0, s);
3174 }
3175 
3176 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3177 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3178 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3179 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3180 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3181 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3182 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3183 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3184 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3185 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3186 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3187 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3188 
3189 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3190 {
3191     return float16_muladd(d, b, a,
3192             float_muladd_negate_c | float_muladd_negate_product, s);
3193 }
3194 
3195 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3196 {
3197     return float32_muladd(d, b, a,
3198             float_muladd_negate_c | float_muladd_negate_product, s);
3199 }
3200 
3201 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3202 {
3203     return float64_muladd(d, b, a,
3204             float_muladd_negate_c | float_muladd_negate_product, s);
3205 }
3206 
3207 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3208 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3209 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3210 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3211 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3212 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3213 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3214 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3215 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3216 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3217 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3218 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3219 
3220 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3221 {
3222     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3223 }
3224 
3225 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3226 {
3227     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3228 }
3229 
3230 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3231 {
3232     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3233 }
3234 
3235 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3236 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3237 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3238 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3239 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3240 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3241 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3242 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3243 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3244 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3245 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3246 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3247 
3248 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3249 {
3250     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3251 }
3252 
3253 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3254 {
3255     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3256 }
3257 
3258 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3259 {
3260     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3261 }
3262 
3263 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3264 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3265 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3266 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3267 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3268 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3269 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3270 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3271 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3272 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3273 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3274 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3275 
3276 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3277 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3278 {
3279     return float32_muladd(float16_to_float32(a, true, s),
3280                         float16_to_float32(b, true, s), d, 0, s);
3281 }
3282 
3283 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3284 {
3285     return float64_muladd(float32_to_float64(a, s),
3286                         float32_to_float64(b, s), d, 0, s);
3287 }
3288 
3289 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3290 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3291 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3292 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3293 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3294 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3295 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3296 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3297 
3298 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3299 {
3300     return float32_muladd(float16_to_float32(a, true, s),
3301                         float16_to_float32(b, true, s), d,
3302                         float_muladd_negate_c | float_muladd_negate_product, s);
3303 }
3304 
3305 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3306 {
3307     return float64_muladd(float32_to_float64(a, s),
3308                         float32_to_float64(b, s), d,
3309                         float_muladd_negate_c | float_muladd_negate_product, s);
3310 }
3311 
3312 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3313 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3314 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3315 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3316 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3317 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3318 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3319 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3320 
3321 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3322 {
3323     return float32_muladd(float16_to_float32(a, true, s),
3324                         float16_to_float32(b, true, s), d,
3325                         float_muladd_negate_c, s);
3326 }
3327 
3328 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3329 {
3330     return float64_muladd(float32_to_float64(a, s),
3331                         float32_to_float64(b, s), d,
3332                         float_muladd_negate_c, s);
3333 }
3334 
3335 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3336 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3337 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3338 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3339 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3340 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3341 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3342 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3343 
3344 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3345 {
3346     return float32_muladd(float16_to_float32(a, true, s),
3347                         float16_to_float32(b, true, s), d,
3348                         float_muladd_negate_product, s);
3349 }
3350 
3351 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3352 {
3353     return float64_muladd(float32_to_float64(a, s),
3354                         float32_to_float64(b, s), d,
3355                         float_muladd_negate_product, s);
3356 }
3357 
3358 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3359 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3360 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3361 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3362 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3363 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3364 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3365 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3366 
3367 /* Vector Floating-Point Square-Root Instruction */
3368 /* (TD, T2, TX2) */
3369 #define OP_UU_H uint16_t, uint16_t, uint16_t
3370 #define OP_UU_W uint32_t, uint32_t, uint32_t
3371 #define OP_UU_D uint64_t, uint64_t, uint64_t
3372 
3373 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3374 static void do_##NAME(void *vd, void *vs2, int i,      \
3375         CPURISCVState *env)                            \
3376 {                                                      \
3377     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3378     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3379 }
3380 
3381 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3382 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3383         CPURISCVState *env, uint32_t desc)             \
3384 {                                                      \
3385     uint32_t vm = vext_vm(desc);                       \
3386     uint32_t vl = env->vl;                             \
3387     uint32_t i;                                        \
3388                                                        \
3389     if (vl == 0) {                                     \
3390         return;                                        \
3391     }                                                  \
3392     for (i = env->vstart; i < vl; i++) {               \
3393         if (!vm && !vext_elem_mask(v0, i)) {           \
3394             continue;                                  \
3395         }                                              \
3396         do_##NAME(vd, vs2, i, env);                    \
3397     }                                                  \
3398     env->vstart = 0;                                   \
3399 }
3400 
3401 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3402 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3403 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3404 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3405 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3406 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3407 
3408 /*
3409  * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3410  *
3411  * Adapted from riscv-v-spec recip.c:
3412  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3413  */
3414 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3415 {
3416     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3417     uint64_t exp = extract64(f, frac_size, exp_size);
3418     uint64_t frac = extract64(f, 0, frac_size);
3419 
3420     const uint8_t lookup_table[] = {
3421         52, 51, 50, 48, 47, 46, 44, 43,
3422         42, 41, 40, 39, 38, 36, 35, 34,
3423         33, 32, 31, 30, 30, 29, 28, 27,
3424         26, 25, 24, 23, 23, 22, 21, 20,
3425         19, 19, 18, 17, 16, 16, 15, 14,
3426         14, 13, 12, 12, 11, 10, 10, 9,
3427         9, 8, 7, 7, 6, 6, 5, 4,
3428         4, 3, 3, 2, 2, 1, 1, 0,
3429         127, 125, 123, 121, 119, 118, 116, 114,
3430         113, 111, 109, 108, 106, 105, 103, 102,
3431         100, 99, 97, 96, 95, 93, 92, 91,
3432         90, 88, 87, 86, 85, 84, 83, 82,
3433         80, 79, 78, 77, 76, 75, 74, 73,
3434         72, 71, 70, 70, 69, 68, 67, 66,
3435         65, 64, 63, 63, 62, 61, 60, 59,
3436         59, 58, 57, 56, 56, 55, 54, 53
3437     };
3438     const int precision = 7;
3439 
3440     if (exp == 0 && frac != 0) { /* subnormal */
3441         /* Normalize the subnormal. */
3442         while (extract64(frac, frac_size - 1, 1) == 0) {
3443             exp--;
3444             frac <<= 1;
3445         }
3446 
3447         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3448     }
3449 
3450     int idx = ((exp & 1) << (precision - 1)) |
3451                 (frac >> (frac_size - precision + 1));
3452     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3453                             (frac_size - precision);
3454     uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3455 
3456     uint64_t val = 0;
3457     val = deposit64(val, 0, frac_size, out_frac);
3458     val = deposit64(val, frac_size, exp_size, out_exp);
3459     val = deposit64(val, frac_size + exp_size, 1, sign);
3460     return val;
3461 }
3462 
3463 static float16 frsqrt7_h(float16 f, float_status *s)
3464 {
3465     int exp_size = 5, frac_size = 10;
3466     bool sign = float16_is_neg(f);
3467 
3468     /*
3469      * frsqrt7(sNaN) = canonical NaN
3470      * frsqrt7(-inf) = canonical NaN
3471      * frsqrt7(-normal) = canonical NaN
3472      * frsqrt7(-subnormal) = canonical NaN
3473      */
3474     if (float16_is_signaling_nan(f, s) ||
3475             (float16_is_infinity(f) && sign) ||
3476             (float16_is_normal(f) && sign) ||
3477             (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3478         s->float_exception_flags |= float_flag_invalid;
3479         return float16_default_nan(s);
3480     }
3481 
3482     /* frsqrt7(qNaN) = canonical NaN */
3483     if (float16_is_quiet_nan(f, s)) {
3484         return float16_default_nan(s);
3485     }
3486 
3487     /* frsqrt7(+-0) = +-inf */
3488     if (float16_is_zero(f)) {
3489         s->float_exception_flags |= float_flag_divbyzero;
3490         return float16_set_sign(float16_infinity, sign);
3491     }
3492 
3493     /* frsqrt7(+inf) = +0 */
3494     if (float16_is_infinity(f) && !sign) {
3495         return float16_set_sign(float16_zero, sign);
3496     }
3497 
3498     /* +normal, +subnormal */
3499     uint64_t val = frsqrt7(f, exp_size, frac_size);
3500     return make_float16(val);
3501 }
3502 
3503 static float32 frsqrt7_s(float32 f, float_status *s)
3504 {
3505     int exp_size = 8, frac_size = 23;
3506     bool sign = float32_is_neg(f);
3507 
3508     /*
3509      * frsqrt7(sNaN) = canonical NaN
3510      * frsqrt7(-inf) = canonical NaN
3511      * frsqrt7(-normal) = canonical NaN
3512      * frsqrt7(-subnormal) = canonical NaN
3513      */
3514     if (float32_is_signaling_nan(f, s) ||
3515             (float32_is_infinity(f) && sign) ||
3516             (float32_is_normal(f) && sign) ||
3517             (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3518         s->float_exception_flags |= float_flag_invalid;
3519         return float32_default_nan(s);
3520     }
3521 
3522     /* frsqrt7(qNaN) = canonical NaN */
3523     if (float32_is_quiet_nan(f, s)) {
3524         return float32_default_nan(s);
3525     }
3526 
3527     /* frsqrt7(+-0) = +-inf */
3528     if (float32_is_zero(f)) {
3529         s->float_exception_flags |= float_flag_divbyzero;
3530         return float32_set_sign(float32_infinity, sign);
3531     }
3532 
3533     /* frsqrt7(+inf) = +0 */
3534     if (float32_is_infinity(f) && !sign) {
3535         return float32_set_sign(float32_zero, sign);
3536     }
3537 
3538     /* +normal, +subnormal */
3539     uint64_t val = frsqrt7(f, exp_size, frac_size);
3540     return make_float32(val);
3541 }
3542 
3543 static float64 frsqrt7_d(float64 f, float_status *s)
3544 {
3545     int exp_size = 11, frac_size = 52;
3546     bool sign = float64_is_neg(f);
3547 
3548     /*
3549      * frsqrt7(sNaN) = canonical NaN
3550      * frsqrt7(-inf) = canonical NaN
3551      * frsqrt7(-normal) = canonical NaN
3552      * frsqrt7(-subnormal) = canonical NaN
3553      */
3554     if (float64_is_signaling_nan(f, s) ||
3555             (float64_is_infinity(f) && sign) ||
3556             (float64_is_normal(f) && sign) ||
3557             (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3558         s->float_exception_flags |= float_flag_invalid;
3559         return float64_default_nan(s);
3560     }
3561 
3562     /* frsqrt7(qNaN) = canonical NaN */
3563     if (float64_is_quiet_nan(f, s)) {
3564         return float64_default_nan(s);
3565     }
3566 
3567     /* frsqrt7(+-0) = +-inf */
3568     if (float64_is_zero(f)) {
3569         s->float_exception_flags |= float_flag_divbyzero;
3570         return float64_set_sign(float64_infinity, sign);
3571     }
3572 
3573     /* frsqrt7(+inf) = +0 */
3574     if (float64_is_infinity(f) && !sign) {
3575         return float64_set_sign(float64_zero, sign);
3576     }
3577 
3578     /* +normal, +subnormal */
3579     uint64_t val = frsqrt7(f, exp_size, frac_size);
3580     return make_float64(val);
3581 }
3582 
3583 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3584 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3585 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3586 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
3587 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
3588 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
3589 
3590 /*
3591  * Vector Floating-Point Reciprocal Estimate Instruction
3592  *
3593  * Adapted from riscv-v-spec recip.c:
3594  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3595  */
3596 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3597                       float_status *s)
3598 {
3599     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3600     uint64_t exp = extract64(f, frac_size, exp_size);
3601     uint64_t frac = extract64(f, 0, frac_size);
3602 
3603     const uint8_t lookup_table[] = {
3604         127, 125, 123, 121, 119, 117, 116, 114,
3605         112, 110, 109, 107, 105, 104, 102, 100,
3606         99, 97, 96, 94, 93, 91, 90, 88,
3607         87, 85, 84, 83, 81, 80, 79, 77,
3608         76, 75, 74, 72, 71, 70, 69, 68,
3609         66, 65, 64, 63, 62, 61, 60, 59,
3610         58, 57, 56, 55, 54, 53, 52, 51,
3611         50, 49, 48, 47, 46, 45, 44, 43,
3612         42, 41, 40, 40, 39, 38, 37, 36,
3613         35, 35, 34, 33, 32, 31, 31, 30,
3614         29, 28, 28, 27, 26, 25, 25, 24,
3615         23, 23, 22, 21, 21, 20, 19, 19,
3616         18, 17, 17, 16, 15, 15, 14, 14,
3617         13, 12, 12, 11, 11, 10, 9, 9,
3618         8, 8, 7, 7, 6, 5, 5, 4,
3619         4, 3, 3, 2, 2, 1, 1, 0
3620     };
3621     const int precision = 7;
3622 
3623     if (exp == 0 && frac != 0) { /* subnormal */
3624         /* Normalize the subnormal. */
3625         while (extract64(frac, frac_size - 1, 1) == 0) {
3626             exp--;
3627             frac <<= 1;
3628         }
3629 
3630         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3631 
3632         if (exp != 0 && exp != UINT64_MAX) {
3633             /*
3634              * Overflow to inf or max value of same sign,
3635              * depending on sign and rounding mode.
3636              */
3637             s->float_exception_flags |= (float_flag_inexact |
3638                                          float_flag_overflow);
3639 
3640             if ((s->float_rounding_mode == float_round_to_zero) ||
3641                 ((s->float_rounding_mode == float_round_down) && !sign) ||
3642                 ((s->float_rounding_mode == float_round_up) && sign)) {
3643                 /* Return greatest/negative finite value. */
3644                 return (sign << (exp_size + frac_size)) |
3645                     (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3646             } else {
3647                 /* Return +-inf. */
3648                 return (sign << (exp_size + frac_size)) |
3649                     MAKE_64BIT_MASK(frac_size, exp_size);
3650             }
3651         }
3652     }
3653 
3654     int idx = frac >> (frac_size - precision);
3655     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3656                             (frac_size - precision);
3657     uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3658 
3659     if (out_exp == 0 || out_exp == UINT64_MAX) {
3660         /*
3661          * The result is subnormal, but don't raise the underflow exception,
3662          * because there's no additional loss of precision.
3663          */
3664         out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3665         if (out_exp == UINT64_MAX) {
3666             out_frac >>= 1;
3667             out_exp = 0;
3668         }
3669     }
3670 
3671     uint64_t val = 0;
3672     val = deposit64(val, 0, frac_size, out_frac);
3673     val = deposit64(val, frac_size, exp_size, out_exp);
3674     val = deposit64(val, frac_size + exp_size, 1, sign);
3675     return val;
3676 }
3677 
3678 static float16 frec7_h(float16 f, float_status *s)
3679 {
3680     int exp_size = 5, frac_size = 10;
3681     bool sign = float16_is_neg(f);
3682 
3683     /* frec7(+-inf) = +-0 */
3684     if (float16_is_infinity(f)) {
3685         return float16_set_sign(float16_zero, sign);
3686     }
3687 
3688     /* frec7(+-0) = +-inf */
3689     if (float16_is_zero(f)) {
3690         s->float_exception_flags |= float_flag_divbyzero;
3691         return float16_set_sign(float16_infinity, sign);
3692     }
3693 
3694     /* frec7(sNaN) = canonical NaN */
3695     if (float16_is_signaling_nan(f, s)) {
3696         s->float_exception_flags |= float_flag_invalid;
3697         return float16_default_nan(s);
3698     }
3699 
3700     /* frec7(qNaN) = canonical NaN */
3701     if (float16_is_quiet_nan(f, s)) {
3702         return float16_default_nan(s);
3703     }
3704 
3705     /* +-normal, +-subnormal */
3706     uint64_t val = frec7(f, exp_size, frac_size, s);
3707     return make_float16(val);
3708 }
3709 
3710 static float32 frec7_s(float32 f, float_status *s)
3711 {
3712     int exp_size = 8, frac_size = 23;
3713     bool sign = float32_is_neg(f);
3714 
3715     /* frec7(+-inf) = +-0 */
3716     if (float32_is_infinity(f)) {
3717         return float32_set_sign(float32_zero, sign);
3718     }
3719 
3720     /* frec7(+-0) = +-inf */
3721     if (float32_is_zero(f)) {
3722         s->float_exception_flags |= float_flag_divbyzero;
3723         return float32_set_sign(float32_infinity, sign);
3724     }
3725 
3726     /* frec7(sNaN) = canonical NaN */
3727     if (float32_is_signaling_nan(f, s)) {
3728         s->float_exception_flags |= float_flag_invalid;
3729         return float32_default_nan(s);
3730     }
3731 
3732     /* frec7(qNaN) = canonical NaN */
3733     if (float32_is_quiet_nan(f, s)) {
3734         return float32_default_nan(s);
3735     }
3736 
3737     /* +-normal, +-subnormal */
3738     uint64_t val = frec7(f, exp_size, frac_size, s);
3739     return make_float32(val);
3740 }
3741 
3742 static float64 frec7_d(float64 f, float_status *s)
3743 {
3744     int exp_size = 11, frac_size = 52;
3745     bool sign = float64_is_neg(f);
3746 
3747     /* frec7(+-inf) = +-0 */
3748     if (float64_is_infinity(f)) {
3749         return float64_set_sign(float64_zero, sign);
3750     }
3751 
3752     /* frec7(+-0) = +-inf */
3753     if (float64_is_zero(f)) {
3754         s->float_exception_flags |= float_flag_divbyzero;
3755         return float64_set_sign(float64_infinity, sign);
3756     }
3757 
3758     /* frec7(sNaN) = canonical NaN */
3759     if (float64_is_signaling_nan(f, s)) {
3760         s->float_exception_flags |= float_flag_invalid;
3761         return float64_default_nan(s);
3762     }
3763 
3764     /* frec7(qNaN) = canonical NaN */
3765     if (float64_is_quiet_nan(f, s)) {
3766         return float64_default_nan(s);
3767     }
3768 
3769     /* +-normal, +-subnormal */
3770     uint64_t val = frec7(f, exp_size, frac_size, s);
3771     return make_float64(val);
3772 }
3773 
3774 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3775 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3776 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3777 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
3778 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
3779 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
3780 
3781 /* Vector Floating-Point MIN/MAX Instructions */
3782 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3783 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3784 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3785 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3786 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3787 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3788 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3789 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3790 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3791 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3792 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3793 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3794 
3795 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3796 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3797 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3798 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3799 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3800 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3801 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3802 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3803 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3804 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3805 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3806 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3807 
3808 /* Vector Floating-Point Sign-Injection Instructions */
3809 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3810 {
3811     return deposit64(b, 0, 15, a);
3812 }
3813 
3814 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3815 {
3816     return deposit64(b, 0, 31, a);
3817 }
3818 
3819 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3820 {
3821     return deposit64(b, 0, 63, a);
3822 }
3823 
3824 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3825 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3826 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3827 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3828 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3829 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3830 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3831 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3832 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3833 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3834 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3835 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3836 
3837 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3838 {
3839     return deposit64(~b, 0, 15, a);
3840 }
3841 
3842 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3843 {
3844     return deposit64(~b, 0, 31, a);
3845 }
3846 
3847 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3848 {
3849     return deposit64(~b, 0, 63, a);
3850 }
3851 
3852 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3853 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3854 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3855 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3856 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3857 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3858 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3859 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3860 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3861 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3862 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3863 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3864 
3865 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3866 {
3867     return deposit64(b ^ a, 0, 15, a);
3868 }
3869 
3870 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3871 {
3872     return deposit64(b ^ a, 0, 31, a);
3873 }
3874 
3875 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3876 {
3877     return deposit64(b ^ a, 0, 63, a);
3878 }
3879 
3880 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3881 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3882 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3883 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3884 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3885 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3886 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3887 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3888 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3889 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3890 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3891 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3892 
3893 /* Vector Floating-Point Compare Instructions */
3894 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3895 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3896                   CPURISCVState *env, uint32_t desc)          \
3897 {                                                             \
3898     uint32_t vm = vext_vm(desc);                              \
3899     uint32_t vl = env->vl;                                    \
3900     uint32_t i;                                               \
3901                                                               \
3902     for (i = env->vstart; i < vl; i++) {                      \
3903         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3904         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3905         if (!vm && !vext_elem_mask(v0, i)) {                  \
3906             continue;                                         \
3907         }                                                     \
3908         vext_set_elem_mask(vd, i,                             \
3909                            DO_OP(s2, s1, &env->fp_status));   \
3910     }                                                         \
3911     env->vstart = 0;                                          \
3912 }
3913 
3914 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3915 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3916 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3917 
3918 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3919 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3920                   CPURISCVState *env, uint32_t desc)                \
3921 {                                                                   \
3922     uint32_t vm = vext_vm(desc);                                    \
3923     uint32_t vl = env->vl;                                          \
3924     uint32_t i;                                                     \
3925                                                                     \
3926     for (i = env->vstart; i < vl; i++) {                            \
3927         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3928         if (!vm && !vext_elem_mask(v0, i)) {                        \
3929             continue;                                               \
3930         }                                                           \
3931         vext_set_elem_mask(vd, i,                                   \
3932                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3933     }                                                               \
3934     env->vstart = 0;                                                \
3935 }
3936 
3937 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3938 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3939 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3940 
3941 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3942 {
3943     FloatRelation compare = float16_compare_quiet(a, b, s);
3944     return compare != float_relation_equal;
3945 }
3946 
3947 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3948 {
3949     FloatRelation compare = float32_compare_quiet(a, b, s);
3950     return compare != float_relation_equal;
3951 }
3952 
3953 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3954 {
3955     FloatRelation compare = float64_compare_quiet(a, b, s);
3956     return compare != float_relation_equal;
3957 }
3958 
3959 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3960 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3961 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3962 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3963 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3964 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3965 
3966 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3967 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3968 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3969 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3970 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3971 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3972 
3973 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3974 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
3975 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
3976 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
3977 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
3978 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
3979 
3980 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
3981 {
3982     FloatRelation compare = float16_compare(a, b, s);
3983     return compare == float_relation_greater;
3984 }
3985 
3986 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
3987 {
3988     FloatRelation compare = float32_compare(a, b, s);
3989     return compare == float_relation_greater;
3990 }
3991 
3992 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
3993 {
3994     FloatRelation compare = float64_compare(a, b, s);
3995     return compare == float_relation_greater;
3996 }
3997 
3998 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
3999 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4000 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4001 
4002 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4003 {
4004     FloatRelation compare = float16_compare(a, b, s);
4005     return compare == float_relation_greater ||
4006            compare == float_relation_equal;
4007 }
4008 
4009 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4010 {
4011     FloatRelation compare = float32_compare(a, b, s);
4012     return compare == float_relation_greater ||
4013            compare == float_relation_equal;
4014 }
4015 
4016 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4017 {
4018     FloatRelation compare = float64_compare(a, b, s);
4019     return compare == float_relation_greater ||
4020            compare == float_relation_equal;
4021 }
4022 
4023 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4024 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4025 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4026 
4027 /* Vector Floating-Point Classify Instruction */
4028 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4029 static void do_##NAME(void *vd, void *vs2, int i)      \
4030 {                                                      \
4031     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4032     *((TD *)vd + HD(i)) = OP(s2);                      \
4033 }
4034 
4035 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
4036 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4037                   CPURISCVState *env, uint32_t desc)   \
4038 {                                                      \
4039     uint32_t vm = vext_vm(desc);                       \
4040     uint32_t vl = env->vl;                             \
4041     uint32_t i;                                        \
4042                                                        \
4043     for (i = env->vstart; i < vl; i++) {               \
4044         if (!vm && !vext_elem_mask(v0, i)) {           \
4045             continue;                                  \
4046         }                                              \
4047         do_##NAME(vd, vs2, i);                         \
4048     }                                                  \
4049     env->vstart = 0;                                   \
4050 }
4051 
4052 target_ulong fclass_h(uint64_t frs1)
4053 {
4054     float16 f = frs1;
4055     bool sign = float16_is_neg(f);
4056 
4057     if (float16_is_infinity(f)) {
4058         return sign ? 1 << 0 : 1 << 7;
4059     } else if (float16_is_zero(f)) {
4060         return sign ? 1 << 3 : 1 << 4;
4061     } else if (float16_is_zero_or_denormal(f)) {
4062         return sign ? 1 << 2 : 1 << 5;
4063     } else if (float16_is_any_nan(f)) {
4064         float_status s = { }; /* for snan_bit_is_one */
4065         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4066     } else {
4067         return sign ? 1 << 1 : 1 << 6;
4068     }
4069 }
4070 
4071 target_ulong fclass_s(uint64_t frs1)
4072 {
4073     float32 f = frs1;
4074     bool sign = float32_is_neg(f);
4075 
4076     if (float32_is_infinity(f)) {
4077         return sign ? 1 << 0 : 1 << 7;
4078     } else if (float32_is_zero(f)) {
4079         return sign ? 1 << 3 : 1 << 4;
4080     } else if (float32_is_zero_or_denormal(f)) {
4081         return sign ? 1 << 2 : 1 << 5;
4082     } else if (float32_is_any_nan(f)) {
4083         float_status s = { }; /* for snan_bit_is_one */
4084         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4085     } else {
4086         return sign ? 1 << 1 : 1 << 6;
4087     }
4088 }
4089 
4090 target_ulong fclass_d(uint64_t frs1)
4091 {
4092     float64 f = frs1;
4093     bool sign = float64_is_neg(f);
4094 
4095     if (float64_is_infinity(f)) {
4096         return sign ? 1 << 0 : 1 << 7;
4097     } else if (float64_is_zero(f)) {
4098         return sign ? 1 << 3 : 1 << 4;
4099     } else if (float64_is_zero_or_denormal(f)) {
4100         return sign ? 1 << 2 : 1 << 5;
4101     } else if (float64_is_any_nan(f)) {
4102         float_status s = { }; /* for snan_bit_is_one */
4103         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4104     } else {
4105         return sign ? 1 << 1 : 1 << 6;
4106     }
4107 }
4108 
4109 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4110 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4111 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4112 GEN_VEXT_V(vfclass_v_h, 2, 2)
4113 GEN_VEXT_V(vfclass_v_w, 4, 4)
4114 GEN_VEXT_V(vfclass_v_d, 8, 8)
4115 
4116 /* Vector Floating-Point Merge Instruction */
4117 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
4118 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4119                   CPURISCVState *env, uint32_t desc)          \
4120 {                                                             \
4121     uint32_t vm = vext_vm(desc);                              \
4122     uint32_t vl = env->vl;                                    \
4123     uint32_t i;                                               \
4124                                                               \
4125     for (i = env->vstart; i < vl; i++) {                      \
4126         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4127         *((ETYPE *)vd + H(i))                                 \
4128           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
4129     }                                                         \
4130     env->vstart = 0;                                          \
4131 }
4132 
4133 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4134 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4135 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4136 
4137 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4138 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4139 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4140 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4141 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4142 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
4143 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
4144 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
4145 
4146 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4147 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4148 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4149 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4150 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
4151 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
4152 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
4153 
4154 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4155 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4156 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4157 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4158 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
4159 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
4160 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
4161 
4162 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4163 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4164 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4165 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4166 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
4167 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
4168 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4169 
4170 /* Widening Floating-Point/Integer Type-Convert Instructions */
4171 /* (TD, T2, TX2) */
4172 #define WOP_UU_B uint16_t, uint8_t,  uint8_t
4173 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4174 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4175 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4176 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4177 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4178 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
4179 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4180 
4181 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4182 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4183 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4184 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
4185 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4186 
4187 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4188 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4189 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4190 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4191 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
4192 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
4193 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4194 
4195 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4196 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4197 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4198 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4199 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
4200 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
4201 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4202 
4203 /*
4204  * vfwcvt.f.f.v vd, vs2, vm
4205  * Convert single-width float to double-width float.
4206  */
4207 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4208 {
4209     return float16_to_float32(a, true, s);
4210 }
4211 
4212 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4213 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4214 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
4215 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
4216 
4217 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4218 /* (TD, T2, TX2) */
4219 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
4220 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4221 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4222 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4223 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4224 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4225 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4226 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
4227 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
4228 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
4229 
4230 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4231 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4232 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4233 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4234 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
4235 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
4236 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
4237 
4238 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4239 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4240 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4241 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
4242 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
4243 
4244 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4245 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4246 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4247 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
4248 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
4249 
4250 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4251 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4252 {
4253     return float32_to_float16(a, true, s);
4254 }
4255 
4256 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4257 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4258 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
4259 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
4260 
4261 /*
4262  *** Vector Reduction Operations
4263  */
4264 /* Vector Single-Width Integer Reduction Instructions */
4265 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4266 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4267         void *vs2, CPURISCVState *env, uint32_t desc)     \
4268 {                                                         \
4269     uint32_t vm = vext_vm(desc);                          \
4270     uint32_t vl = env->vl;                                \
4271     uint32_t i;                                           \
4272     TD s1 =  *((TD *)vs1 + HD(0));                        \
4273                                                           \
4274     for (i = env->vstart; i < vl; i++) {                  \
4275         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4276         if (!vm && !vext_elem_mask(v0, i)) {              \
4277             continue;                                     \
4278         }                                                 \
4279         s1 = OP(s1, (TD)s2);                              \
4280     }                                                     \
4281     *((TD *)vd + HD(0)) = s1;                             \
4282     env->vstart = 0;                                      \
4283 }
4284 
4285 /* vd[0] = sum(vs1[0], vs2[*]) */
4286 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4287 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4288 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4289 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4290 
4291 /* vd[0] = maxu(vs1[0], vs2[*]) */
4292 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4293 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4294 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4295 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4296 
4297 /* vd[0] = max(vs1[0], vs2[*]) */
4298 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4299 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4300 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4301 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4302 
4303 /* vd[0] = minu(vs1[0], vs2[*]) */
4304 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4305 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4306 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4307 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4308 
4309 /* vd[0] = min(vs1[0], vs2[*]) */
4310 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4311 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4312 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4313 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4314 
4315 /* vd[0] = and(vs1[0], vs2[*]) */
4316 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4317 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4318 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4319 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4320 
4321 /* vd[0] = or(vs1[0], vs2[*]) */
4322 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4323 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4324 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4325 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4326 
4327 /* vd[0] = xor(vs1[0], vs2[*]) */
4328 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4329 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4330 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4331 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4332 
4333 /* Vector Widening Integer Reduction Instructions */
4334 /* signed sum reduction into double-width accumulator */
4335 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4336 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4337 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4338 
4339 /* Unsigned sum reduction into double-width accumulator */
4340 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4341 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4342 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4343 
4344 /* Vector Single-Width Floating-Point Reduction Instructions */
4345 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4346 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4347                   void *vs2, CPURISCVState *env,           \
4348                   uint32_t desc)                           \
4349 {                                                          \
4350     uint32_t vm = vext_vm(desc);                           \
4351     uint32_t vl = env->vl;                                 \
4352     uint32_t i;                                            \
4353     TD s1 =  *((TD *)vs1 + HD(0));                         \
4354                                                            \
4355     for (i = env->vstart; i < vl; i++) {                   \
4356         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4357         if (!vm && !vext_elem_mask(v0, i)) {               \
4358             continue;                                      \
4359         }                                                  \
4360         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4361     }                                                      \
4362     *((TD *)vd + HD(0)) = s1;                              \
4363     env->vstart = 0;                                       \
4364 }
4365 
4366 /* Unordered sum */
4367 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4368 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4369 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4370 
4371 /* Maximum value */
4372 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4373 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4374 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4375 
4376 /* Minimum value */
4377 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4378 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4379 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4380 
4381 /* Vector Widening Floating-Point Reduction Instructions */
4382 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4383 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4384                             void *vs2, CPURISCVState *env, uint32_t desc)
4385 {
4386     uint32_t vm = vext_vm(desc);
4387     uint32_t vl = env->vl;
4388     uint32_t i;
4389     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4390 
4391     for (i = env->vstart; i < vl; i++) {
4392         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4393         if (!vm && !vext_elem_mask(v0, i)) {
4394             continue;
4395         }
4396         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4397                          &env->fp_status);
4398     }
4399     *((uint32_t *)vd + H4(0)) = s1;
4400     env->vstart = 0;
4401 }
4402 
4403 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4404                             void *vs2, CPURISCVState *env, uint32_t desc)
4405 {
4406     uint32_t vm = vext_vm(desc);
4407     uint32_t vl = env->vl;
4408     uint32_t i;
4409     uint64_t s1 =  *((uint64_t *)vs1);
4410 
4411     for (i = env->vstart; i < vl; i++) {
4412         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4413         if (!vm && !vext_elem_mask(v0, i)) {
4414             continue;
4415         }
4416         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4417                          &env->fp_status);
4418     }
4419     *((uint64_t *)vd) = s1;
4420     env->vstart = 0;
4421 }
4422 
4423 /*
4424  *** Vector Mask Operations
4425  */
4426 /* Vector Mask-Register Logical Instructions */
4427 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4428 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4429                   void *vs2, CPURISCVState *env,          \
4430                   uint32_t desc)                          \
4431 {                                                         \
4432     uint32_t vl = env->vl;                                \
4433     uint32_t i;                                           \
4434     int a, b;                                             \
4435                                                           \
4436     for (i = env->vstart; i < vl; i++) {                  \
4437         a = vext_elem_mask(vs1, i);                       \
4438         b = vext_elem_mask(vs2, i);                       \
4439         vext_set_elem_mask(vd, i, OP(b, a));              \
4440     }                                                     \
4441     env->vstart = 0;                                      \
4442 }
4443 
4444 #define DO_NAND(N, M)  (!(N & M))
4445 #define DO_ANDNOT(N, M)  (N & !M)
4446 #define DO_NOR(N, M)  (!(N | M))
4447 #define DO_ORNOT(N, M)  (N | !M)
4448 #define DO_XNOR(N, M)  (!(N ^ M))
4449 
4450 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4451 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4452 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
4453 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4454 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4455 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4456 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
4457 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4458 
4459 /* Vector count population in mask vcpop */
4460 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4461                              uint32_t desc)
4462 {
4463     target_ulong cnt = 0;
4464     uint32_t vm = vext_vm(desc);
4465     uint32_t vl = env->vl;
4466     int i;
4467 
4468     for (i = env->vstart; i < vl; i++) {
4469         if (vm || vext_elem_mask(v0, i)) {
4470             if (vext_elem_mask(vs2, i)) {
4471                 cnt++;
4472             }
4473         }
4474     }
4475     env->vstart = 0;
4476     return cnt;
4477 }
4478 
4479 /* vfirst find-first-set mask bit*/
4480 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4481                               uint32_t desc)
4482 {
4483     uint32_t vm = vext_vm(desc);
4484     uint32_t vl = env->vl;
4485     int i;
4486 
4487     for (i = env->vstart; i < vl; i++) {
4488         if (vm || vext_elem_mask(v0, i)) {
4489             if (vext_elem_mask(vs2, i)) {
4490                 return i;
4491             }
4492         }
4493     }
4494     env->vstart = 0;
4495     return -1LL;
4496 }
4497 
4498 enum set_mask_type {
4499     ONLY_FIRST = 1,
4500     INCLUDE_FIRST,
4501     BEFORE_FIRST,
4502 };
4503 
4504 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4505                    uint32_t desc, enum set_mask_type type)
4506 {
4507     uint32_t vm = vext_vm(desc);
4508     uint32_t vl = env->vl;
4509     int i;
4510     bool first_mask_bit = false;
4511 
4512     for (i = env->vstart; i < vl; i++) {
4513         if (!vm && !vext_elem_mask(v0, i)) {
4514             continue;
4515         }
4516         /* write a zero to all following active elements */
4517         if (first_mask_bit) {
4518             vext_set_elem_mask(vd, i, 0);
4519             continue;
4520         }
4521         if (vext_elem_mask(vs2, i)) {
4522             first_mask_bit = true;
4523             if (type == BEFORE_FIRST) {
4524                 vext_set_elem_mask(vd, i, 0);
4525             } else {
4526                 vext_set_elem_mask(vd, i, 1);
4527             }
4528         } else {
4529             if (type == ONLY_FIRST) {
4530                 vext_set_elem_mask(vd, i, 0);
4531             } else {
4532                 vext_set_elem_mask(vd, i, 1);
4533             }
4534         }
4535     }
4536     env->vstart = 0;
4537 }
4538 
4539 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4540                      uint32_t desc)
4541 {
4542     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4543 }
4544 
4545 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4546                      uint32_t desc)
4547 {
4548     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4549 }
4550 
4551 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4552                      uint32_t desc)
4553 {
4554     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4555 }
4556 
4557 /* Vector Iota Instruction */
4558 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4559 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4560                   uint32_t desc)                                          \
4561 {                                                                         \
4562     uint32_t vm = vext_vm(desc);                                          \
4563     uint32_t vl = env->vl;                                                \
4564     uint32_t sum = 0;                                                     \
4565     int i;                                                                \
4566                                                                           \
4567     for (i = env->vstart; i < vl; i++) {                                  \
4568         if (!vm && !vext_elem_mask(v0, i)) {                              \
4569             continue;                                                     \
4570         }                                                                 \
4571         *((ETYPE *)vd + H(i)) = sum;                                      \
4572         if (vext_elem_mask(vs2, i)) {                                     \
4573             sum++;                                                        \
4574         }                                                                 \
4575     }                                                                     \
4576     env->vstart = 0;                                                      \
4577 }
4578 
4579 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4580 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4581 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4582 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4583 
4584 /* Vector Element Index Instruction */
4585 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4586 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4587 {                                                                         \
4588     uint32_t vm = vext_vm(desc);                                          \
4589     uint32_t vl = env->vl;                                                \
4590     int i;                                                                \
4591                                                                           \
4592     for (i = env->vstart; i < vl; i++) {                                  \
4593         if (!vm && !vext_elem_mask(v0, i)) {                              \
4594             continue;                                                     \
4595         }                                                                 \
4596         *((ETYPE *)vd + H(i)) = i;                                        \
4597     }                                                                     \
4598     env->vstart = 0;                                                      \
4599 }
4600 
4601 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4602 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4603 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4604 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4605 
4606 /*
4607  *** Vector Permutation Instructions
4608  */
4609 
4610 /* Vector Slide Instructions */
4611 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4612 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4613                   CPURISCVState *env, uint32_t desc)                      \
4614 {                                                                         \
4615     uint32_t vm = vext_vm(desc);                                          \
4616     uint32_t vl = env->vl;                                                \
4617     target_ulong offset = s1, i_min, i;                                   \
4618                                                                           \
4619     i_min = MAX(env->vstart, offset);                                     \
4620     for (i = i_min; i < vl; i++) {                                        \
4621         if (!vm && !vext_elem_mask(v0, i)) {                              \
4622             continue;                                                     \
4623         }                                                                 \
4624         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4625     }                                                                     \
4626 }
4627 
4628 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4629 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4630 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4631 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4632 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4633 
4634 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4635 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4636                   CPURISCVState *env, uint32_t desc)                      \
4637 {                                                                         \
4638     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4639     uint32_t vm = vext_vm(desc);                                          \
4640     uint32_t vl = env->vl;                                                \
4641     target_ulong i_max, i;                                                \
4642                                                                           \
4643     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4644     for (i = env->vstart; i < i_max; ++i) {                               \
4645         if (vm || vext_elem_mask(v0, i)) {                                \
4646             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4647         }                                                                 \
4648     }                                                                     \
4649                                                                           \
4650     for (i = i_max; i < vl; ++i) {                                        \
4651         if (vm || vext_elem_mask(v0, i)) {                                \
4652             *((ETYPE *)vd + H(i)) = 0;                                    \
4653         }                                                                 \
4654     }                                                                     \
4655                                                                           \
4656     env->vstart = 0;                                                      \
4657 }
4658 
4659 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4660 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4661 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4662 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4663 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4664 
4665 #define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4666 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4667                      CPURISCVState *env, uint32_t desc)                     \
4668 {                                                                           \
4669     typedef uint##ESZ##_t ETYPE;                                            \
4670     uint32_t vm = vext_vm(desc);                                            \
4671     uint32_t vl = env->vl;                                                  \
4672     uint32_t i;                                                             \
4673                                                                             \
4674     for (i = env->vstart; i < vl; i++) {                                    \
4675         if (!vm && !vext_elem_mask(v0, i)) {                                \
4676             continue;                                                       \
4677         }                                                                   \
4678         if (i == 0) {                                                       \
4679             *((ETYPE *)vd + H(i)) = s1;                                     \
4680         } else {                                                            \
4681             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4682         }                                                                   \
4683     }                                                                       \
4684     env->vstart = 0;                                                        \
4685 }
4686 
4687 GEN_VEXT_VSLIE1UP(8,  H1)
4688 GEN_VEXT_VSLIE1UP(16, H2)
4689 GEN_VEXT_VSLIE1UP(32, H4)
4690 GEN_VEXT_VSLIE1UP(64, H8)
4691 
4692 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4693 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4694                   CPURISCVState *env, uint32_t desc)              \
4695 {                                                                 \
4696     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4697 }
4698 
4699 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4700 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4701 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4702 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4703 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4704 
4705 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4706 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4707                        CPURISCVState *env, uint32_t desc)                     \
4708 {                                                                             \
4709     typedef uint##ESZ##_t ETYPE;                                              \
4710     uint32_t vm = vext_vm(desc);                                              \
4711     uint32_t vl = env->vl;                                                    \
4712     uint32_t i;                                                               \
4713                                                                               \
4714     for (i = env->vstart; i < vl; i++) {                                      \
4715         if (!vm && !vext_elem_mask(v0, i)) {                                  \
4716             continue;                                                         \
4717         }                                                                     \
4718         if (i == vl - 1) {                                                    \
4719             *((ETYPE *)vd + H(i)) = s1;                                       \
4720         } else {                                                              \
4721             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4722         }                                                                     \
4723     }                                                                         \
4724     env->vstart = 0;                                                          \
4725 }
4726 
4727 GEN_VEXT_VSLIDE1DOWN(8,  H1)
4728 GEN_VEXT_VSLIDE1DOWN(16, H2)
4729 GEN_VEXT_VSLIDE1DOWN(32, H4)
4730 GEN_VEXT_VSLIDE1DOWN(64, H8)
4731 
4732 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4733 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4734                   CPURISCVState *env, uint32_t desc)              \
4735 {                                                                 \
4736     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4737 }
4738 
4739 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4740 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4741 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4742 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4743 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4744 
4745 /* Vector Floating-Point Slide Instructions */
4746 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4747 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4748                   CPURISCVState *env, uint32_t desc)          \
4749 {                                                             \
4750     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4751 }
4752 
4753 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4754 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4755 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4756 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4757 
4758 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4759 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4760                   CPURISCVState *env, uint32_t desc)          \
4761 {                                                             \
4762     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4763 }
4764 
4765 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4766 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4767 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4768 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4769 
4770 /* Vector Register Gather Instruction */
4771 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4772 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4773                   CPURISCVState *env, uint32_t desc)                      \
4774 {                                                                         \
4775     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4776     uint32_t vm = vext_vm(desc);                                          \
4777     uint32_t vl = env->vl;                                                \
4778     uint64_t index;                                                       \
4779     uint32_t i;                                                           \
4780                                                                           \
4781     for (i = env->vstart; i < vl; i++) {                                  \
4782         if (!vm && !vext_elem_mask(v0, i)) {                              \
4783             continue;                                                     \
4784         }                                                                 \
4785         index = *((TS1 *)vs1 + HS1(i));                                   \
4786         if (index >= vlmax) {                                             \
4787             *((TS2 *)vd + HS2(i)) = 0;                                    \
4788         } else {                                                          \
4789             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4790         }                                                                 \
4791     }                                                                     \
4792     env->vstart = 0;                                                      \
4793 }
4794 
4795 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4796 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4797 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4798 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4799 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4800 
4801 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4802 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4803 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4804 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4805 
4806 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4807 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4808                   CPURISCVState *env, uint32_t desc)                      \
4809 {                                                                         \
4810     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4811     uint32_t vm = vext_vm(desc);                                          \
4812     uint32_t vl = env->vl;                                                \
4813     uint64_t index = s1;                                                  \
4814     uint32_t i;                                                           \
4815                                                                           \
4816     for (i = env->vstart; i < vl; i++) {                                  \
4817         if (!vm && !vext_elem_mask(v0, i)) {                              \
4818             continue;                                                     \
4819         }                                                                 \
4820         if (index >= vlmax) {                                             \
4821             *((ETYPE *)vd + H(i)) = 0;                                    \
4822         } else {                                                          \
4823             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4824         }                                                                 \
4825     }                                                                     \
4826     env->vstart = 0;                                                      \
4827 }
4828 
4829 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4830 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4831 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4832 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4833 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4834 
4835 /* Vector Compress Instruction */
4836 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4837 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4838                   CPURISCVState *env, uint32_t desc)                      \
4839 {                                                                         \
4840     uint32_t vl = env->vl;                                                \
4841     uint32_t num = 0, i;                                                  \
4842                                                                           \
4843     for (i = env->vstart; i < vl; i++) {                                  \
4844         if (!vext_elem_mask(vs1, i)) {                                    \
4845             continue;                                                     \
4846         }                                                                 \
4847         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4848         num++;                                                            \
4849     }                                                                     \
4850     env->vstart = 0;                                                      \
4851 }
4852 
4853 /* Compress into vd elements of vs2 where vs1 is enabled */
4854 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4855 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4856 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4857 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4858 
4859 /* Vector Whole Register Move */
4860 #define GEN_VEXT_VMV_WHOLE(NAME, LEN)                      \
4861 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
4862                   uint32_t desc)                           \
4863 {                                                          \
4864     /* EEW = 8 */                                          \
4865     uint32_t maxsz = simd_maxsz(desc);                     \
4866     uint32_t i = env->vstart;                              \
4867                                                            \
4868     memcpy((uint8_t *)vd + H1(i),                          \
4869            (uint8_t *)vs2 + H1(i),                         \
4870            maxsz - env->vstart);                           \
4871                                                            \
4872     env->vstart = 0;                                       \
4873 }
4874 
4875 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
4876 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
4877 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
4878 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
4879 
4880 /* Vector Integer Extension */
4881 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4882 void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4883                   CPURISCVState *env, uint32_t desc)             \
4884 {                                                                \
4885     uint32_t vl = env->vl;                                       \
4886     uint32_t vm = vext_vm(desc);                                 \
4887     uint32_t i;                                                  \
4888                                                                  \
4889     for (i = env->vstart; i < vl; i++) {                         \
4890         if (!vm && !vext_elem_mask(v0, i)) {                     \
4891             continue;                                            \
4892         }                                                        \
4893         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4894     }                                                            \
4895     env->vstart = 0;                                             \
4896 }
4897 
4898 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4899 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4900 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4901 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4902 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4903 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4904 
4905 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4906 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4907 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4908 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4909 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4910 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4911