xref: /openbmc/qemu/target/riscv/vector_helper.c (revision 05a248715cef192336a594afed812871a52efc1f)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30 
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32                             target_ulong s2)
33 {
34     int vlmax, vl;
35     RISCVCPU *cpu = env_archcpu(env);
36     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39     bool vill = FIELD_EX64(s2, VTYPE, VILL);
40     target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
41 
42     if (lmul & 4) {
43         /* Fractional LMUL. */
44         if (lmul == 4 ||
45             cpu->cfg.elen >> (8 - lmul) < sew) {
46             vill = true;
47         }
48     }
49 
50     if ((sew > cpu->cfg.elen)
51         || vill
52         || (ediv != 0)
53         || (reserved != 0)) {
54         /* only set vill bit. */
55         env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
56         env->vl = 0;
57         env->vstart = 0;
58         return 0;
59     }
60 
61     vlmax = vext_get_vlmax(cpu, s2);
62     if (s1 <= vlmax) {
63         vl = s1;
64     } else {
65         vl = vlmax;
66     }
67     env->vl = vl;
68     env->vtype = s2;
69     env->vstart = 0;
70     return vl;
71 }
72 
73 /*
74  * Note that vector data is stored in host-endian 64-bit chunks,
75  * so addressing units smaller than that needs a host-endian fixup.
76  */
77 #ifdef HOST_WORDS_BIGENDIAN
78 #define H1(x)   ((x) ^ 7)
79 #define H1_2(x) ((x) ^ 6)
80 #define H1_4(x) ((x) ^ 4)
81 #define H2(x)   ((x) ^ 3)
82 #define H4(x)   ((x) ^ 1)
83 #define H8(x)   ((x))
84 #else
85 #define H1(x)   (x)
86 #define H1_2(x) (x)
87 #define H1_4(x) (x)
88 #define H2(x)   (x)
89 #define H4(x)   (x)
90 #define H8(x)   (x)
91 #endif
92 
93 static inline uint32_t vext_nf(uint32_t desc)
94 {
95     return FIELD_EX32(simd_data(desc), VDATA, NF);
96 }
97 
98 static inline uint32_t vext_vm(uint32_t desc)
99 {
100     return FIELD_EX32(simd_data(desc), VDATA, VM);
101 }
102 
103 /*
104  * Encode LMUL to lmul as following:
105  *     LMUL    vlmul    lmul
106  *      1       000       0
107  *      2       001       1
108  *      4       010       2
109  *      8       011       3
110  *      -       100       -
111  *     1/8      101      -3
112  *     1/4      110      -2
113  *     1/2      111      -1
114  */
115 static inline int32_t vext_lmul(uint32_t desc)
116 {
117     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
118 }
119 
120 /*
121  * Get the maximum number of elements can be operated.
122  *
123  * esz: log2 of element size in bytes.
124  */
125 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
126 {
127     /*
128      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
129      * so vlen in bytes (vlenb) is encoded as maxsz.
130      */
131     uint32_t vlenb = simd_maxsz(desc);
132 
133     /* Return VLMAX */
134     int scale = vext_lmul(desc) - esz;
135     return scale < 0 ? vlenb >> -scale : vlenb << scale;
136 }
137 
138 /*
139  * This function checks watchpoint before real load operation.
140  *
141  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
142  * In user mode, there is no watchpoint support now.
143  *
144  * It will trigger an exception if there is no mapping in TLB
145  * and page table walk can't fill the TLB entry. Then the guest
146  * software can return here after process the exception or never return.
147  */
148 static void probe_pages(CPURISCVState *env, target_ulong addr,
149                         target_ulong len, uintptr_t ra,
150                         MMUAccessType access_type)
151 {
152     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
153     target_ulong curlen = MIN(pagelen, len);
154 
155     probe_access(env, addr, curlen, access_type,
156                  cpu_mmu_index(env, false), ra);
157     if (len > curlen) {
158         addr += curlen;
159         curlen = len - curlen;
160         probe_access(env, addr, curlen, access_type,
161                      cpu_mmu_index(env, false), ra);
162     }
163 }
164 
165 static inline void vext_set_elem_mask(void *v0, int index,
166                                       uint8_t value)
167 {
168     int idx = index / 64;
169     int pos = index % 64;
170     uint64_t old = ((uint64_t *)v0)[idx];
171     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
172 }
173 
174 /*
175  * Earlier designs (pre-0.9) had a varying number of bits
176  * per mask value (MLEN). In the 0.9 design, MLEN=1.
177  * (Section 4.5)
178  */
179 static inline int vext_elem_mask(void *v0, int index)
180 {
181     int idx = index / 64;
182     int pos = index  % 64;
183     return (((uint64_t *)v0)[idx] >> pos) & 1;
184 }
185 
186 /* elements operations for load and store */
187 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
188                                uint32_t idx, void *vd, uintptr_t retaddr);
189 
190 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
191 static void NAME(CPURISCVState *env, abi_ptr addr,         \
192                  uint32_t idx, void *vd, uintptr_t retaddr)\
193 {                                                          \
194     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
195     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
196 }                                                          \
197 
198 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
199 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
200 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
201 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
202 
203 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
204 static void NAME(CPURISCVState *env, abi_ptr addr,         \
205                  uint32_t idx, void *vd, uintptr_t retaddr)\
206 {                                                          \
207     ETYPE data = *((ETYPE *)vd + H(idx));                  \
208     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
209 }
210 
211 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
212 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
213 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
214 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
215 
216 /*
217  *** stride: access vector element from strided memory
218  */
219 static void
220 vext_ldst_stride(void *vd, void *v0, target_ulong base,
221                  target_ulong stride, CPURISCVState *env,
222                  uint32_t desc, uint32_t vm,
223                  vext_ldst_elem_fn *ldst_elem,
224                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
225 {
226     uint32_t i, k;
227     uint32_t nf = vext_nf(desc);
228     uint32_t max_elems = vext_max_elems(desc, esz);
229 
230     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
231         if (!vm && !vext_elem_mask(v0, i)) {
232             continue;
233         }
234 
235         k = 0;
236         while (k < nf) {
237             target_ulong addr = base + stride * i + (k << esz);
238             ldst_elem(env, addr, i + k * max_elems, vd, ra);
239             k++;
240         }
241     }
242     env->vstart = 0;
243 }
244 
245 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
246 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
247                   target_ulong stride, CPURISCVState *env,              \
248                   uint32_t desc)                                        \
249 {                                                                       \
250     uint32_t vm = vext_vm(desc);                                        \
251     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
252                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
253 }
254 
255 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
256 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
257 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
258 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
259 
260 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
261 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
262                   target_ulong stride, CPURISCVState *env,              \
263                   uint32_t desc)                                        \
264 {                                                                       \
265     uint32_t vm = vext_vm(desc);                                        \
266     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
267                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
268 }
269 
270 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
271 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
272 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
273 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
274 
275 /*
276  *** unit-stride: access elements stored contiguously in memory
277  */
278 
279 /* unmasked unit-stride load and store operation*/
280 static void
281 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
282              vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
283              uintptr_t ra, MMUAccessType access_type)
284 {
285     uint32_t i, k;
286     uint32_t nf = vext_nf(desc);
287     uint32_t max_elems = vext_max_elems(desc, esz);
288 
289     /* load bytes from guest memory */
290     for (i = env->vstart; i < evl; i++, env->vstart++) {
291         k = 0;
292         while (k < nf) {
293             target_ulong addr = base + ((i * nf + k) << esz);
294             ldst_elem(env, addr, i + k * max_elems, vd, ra);
295             k++;
296         }
297     }
298     env->vstart = 0;
299 }
300 
301 /*
302  * masked unit-stride load and store operation will be a special case of stride,
303  * stride = NF * sizeof (MTYPE)
304  */
305 
306 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
307 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
308                          CPURISCVState *env, uint32_t desc)             \
309 {                                                                       \
310     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
311     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
312                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
313 }                                                                       \
314                                                                         \
315 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
316                   CPURISCVState *env, uint32_t desc)                    \
317 {                                                                       \
318     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
319                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
320 }
321 
322 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
323 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
324 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
325 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
326 
327 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                            \
328 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,          \
329                          CPURISCVState *env, uint32_t desc)              \
330 {                                                                        \
331     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));              \
332     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,   \
333                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);      \
334 }                                                                        \
335                                                                          \
336 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                 \
337                   CPURISCVState *env, uint32_t desc)                     \
338 {                                                                        \
339     vext_ldst_us(vd, base, env, desc, STORE_FN,                          \
340                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
341 }
342 
343 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
344 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
345 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
346 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
347 
348 /*
349  *** unit stride mask load and store, EEW = 1
350  */
351 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
352                     CPURISCVState *env, uint32_t desc)
353 {
354     /* evl = ceil(vl/8) */
355     uint8_t evl = (env->vl + 7) >> 3;
356     vext_ldst_us(vd, base, env, desc, lde_b,
357                  0, evl, GETPC(), MMU_DATA_LOAD);
358 }
359 
360 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
361                     CPURISCVState *env, uint32_t desc)
362 {
363     /* evl = ceil(vl/8) */
364     uint8_t evl = (env->vl + 7) >> 3;
365     vext_ldst_us(vd, base, env, desc, ste_b,
366                  0, evl, GETPC(), MMU_DATA_STORE);
367 }
368 
369 /*
370  *** index: access vector element from indexed memory
371  */
372 typedef target_ulong vext_get_index_addr(target_ulong base,
373         uint32_t idx, void *vs2);
374 
375 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
376 static target_ulong NAME(target_ulong base,            \
377                          uint32_t idx, void *vs2)      \
378 {                                                      \
379     return (base + *((ETYPE *)vs2 + H(idx)));          \
380 }
381 
382 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
383 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
384 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
385 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
386 
387 static inline void
388 vext_ldst_index(void *vd, void *v0, target_ulong base,
389                 void *vs2, CPURISCVState *env, uint32_t desc,
390                 vext_get_index_addr get_index_addr,
391                 vext_ldst_elem_fn *ldst_elem,
392                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
393 {
394     uint32_t i, k;
395     uint32_t nf = vext_nf(desc);
396     uint32_t vm = vext_vm(desc);
397     uint32_t max_elems = vext_max_elems(desc, esz);
398 
399     /* load bytes from guest memory */
400     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
401         if (!vm && !vext_elem_mask(v0, i)) {
402             continue;
403         }
404 
405         k = 0;
406         while (k < nf) {
407             abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
408             ldst_elem(env, addr, i + k * max_elems, vd, ra);
409             k++;
410         }
411     }
412     env->vstart = 0;
413 }
414 
415 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
416 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
417                   void *vs2, CPURISCVState *env, uint32_t desc)            \
418 {                                                                          \
419     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
420                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
421 }
422 
423 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
424 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
425 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
426 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
427 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
428 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
429 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
430 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
431 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
432 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
433 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
434 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
435 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
436 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
437 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
438 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
439 
440 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
441 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
442                   void *vs2, CPURISCVState *env, uint32_t desc)  \
443 {                                                                \
444     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
445                     STORE_FN, ctzl(sizeof(ETYPE)),               \
446                     GETPC(), MMU_DATA_STORE);                    \
447 }
448 
449 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
450 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
451 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
452 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
453 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
454 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
455 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
456 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
457 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
458 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
459 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
460 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
461 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
462 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
463 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
464 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
465 
466 /*
467  *** unit-stride fault-only-fisrt load instructions
468  */
469 static inline void
470 vext_ldff(void *vd, void *v0, target_ulong base,
471           CPURISCVState *env, uint32_t desc,
472           vext_ldst_elem_fn *ldst_elem,
473           uint32_t esz, uintptr_t ra)
474 {
475     void *host;
476     uint32_t i, k, vl = 0;
477     uint32_t nf = vext_nf(desc);
478     uint32_t vm = vext_vm(desc);
479     uint32_t max_elems = vext_max_elems(desc, esz);
480     target_ulong addr, offset, remain;
481 
482     /* probe every access*/
483     for (i = env->vstart; i < env->vl; i++) {
484         if (!vm && !vext_elem_mask(v0, i)) {
485             continue;
486         }
487         addr = base + i * (nf << esz);
488         if (i == 0) {
489             probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
490         } else {
491             /* if it triggers an exception, no need to check watchpoint */
492             remain = nf << esz;
493             while (remain > 0) {
494                 offset = -(addr | TARGET_PAGE_MASK);
495                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
496                                          cpu_mmu_index(env, false));
497                 if (host) {
498 #ifdef CONFIG_USER_ONLY
499                     if (page_check_range(addr, nf << esz, PAGE_READ) < 0) {
500                         vl = i;
501                         goto ProbeSuccess;
502                     }
503 #else
504                     probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
505 #endif
506                 } else {
507                     vl = i;
508                     goto ProbeSuccess;
509                 }
510                 if (remain <=  offset) {
511                     break;
512                 }
513                 remain -= offset;
514                 addr += offset;
515             }
516         }
517     }
518 ProbeSuccess:
519     /* load bytes from guest memory */
520     if (vl != 0) {
521         env->vl = vl;
522     }
523     for (i = env->vstart; i < env->vl; i++) {
524         k = 0;
525         if (!vm && !vext_elem_mask(v0, i)) {
526             continue;
527         }
528         while (k < nf) {
529             target_ulong addr = base + ((i * nf + k) << esz);
530             ldst_elem(env, addr, i + k * max_elems, vd, ra);
531             k++;
532         }
533     }
534     env->vstart = 0;
535 }
536 
537 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
538 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
539                   CPURISCVState *env, uint32_t desc)      \
540 {                                                         \
541     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
542               ctzl(sizeof(ETYPE)), GETPC());              \
543 }
544 
545 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
546 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
547 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
548 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
549 
550 #define DO_SWAP(N, M) (M)
551 #define DO_AND(N, M)  (N & M)
552 #define DO_XOR(N, M)  (N ^ M)
553 #define DO_OR(N, M)   (N | M)
554 #define DO_ADD(N, M)  (N + M)
555 
556 /* Signed min/max */
557 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
558 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
559 
560 /* Unsigned min/max */
561 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
562 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
563 
564 /*
565  *** load and store whole register instructions
566  */
567 static void
568 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
569                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
570                 MMUAccessType access_type)
571 {
572     uint32_t i, k, off, pos;
573     uint32_t nf = vext_nf(desc);
574     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
575     uint32_t max_elems = vlenb >> esz;
576 
577     k = env->vstart / max_elems;
578     off = env->vstart % max_elems;
579 
580     if (off) {
581         /* load/store rest of elements of current segment pointed by vstart */
582         for (pos = off; pos < max_elems; pos++, env->vstart++) {
583             target_ulong addr = base + ((pos + k * max_elems) << esz);
584             ldst_elem(env, addr, pos + k * max_elems, vd, ra);
585         }
586         k++;
587     }
588 
589     /* load/store elements for rest of segments */
590     for (; k < nf; k++) {
591         for (i = 0; i < max_elems; i++, env->vstart++) {
592             target_ulong addr = base + ((i + k * max_elems) << esz);
593             ldst_elem(env, addr, i + k * max_elems, vd, ra);
594         }
595     }
596 
597     env->vstart = 0;
598 }
599 
600 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
601 void HELPER(NAME)(void *vd, target_ulong base,       \
602                   CPURISCVState *env, uint32_t desc) \
603 {                                                    \
604     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
605                     ctzl(sizeof(ETYPE)), GETPC(),    \
606                     MMU_DATA_LOAD);                  \
607 }
608 
609 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
610 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
611 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
612 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
613 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
614 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
615 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
616 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
617 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
618 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
619 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
620 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
621 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
622 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
623 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
624 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
625 
626 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
627 void HELPER(NAME)(void *vd, target_ulong base,       \
628                   CPURISCVState *env, uint32_t desc) \
629 {                                                    \
630     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
631                     ctzl(sizeof(ETYPE)), GETPC(),    \
632                     MMU_DATA_STORE);                 \
633 }
634 
635 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
636 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
637 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
638 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
639 
640 /*
641  *** Vector Integer Arithmetic Instructions
642  */
643 
644 /* expand macro args before macro */
645 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
646 
647 /* (TD, T1, T2, TX1, TX2) */
648 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
649 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
650 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
651 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
652 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
653 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
654 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
655 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
656 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
657 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
658 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
659 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
660 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
661 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
662 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
663 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
664 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
665 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
666 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
667 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
668 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
669 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
670 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
671 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
672 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
673 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
674 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
675 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
676 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
677 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
678 
679 /* operation of two vector elements */
680 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
681 
682 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
683 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
684 {                                                               \
685     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
686     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
687     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
688 }
689 #define DO_SUB(N, M) (N - M)
690 #define DO_RSUB(N, M) (M - N)
691 
692 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
693 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
694 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
695 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
696 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
697 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
698 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
699 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
700 
701 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
702                        CPURISCVState *env, uint32_t desc,
703                        uint32_t esz, uint32_t dsz,
704                        opivv2_fn *fn)
705 {
706     uint32_t vm = vext_vm(desc);
707     uint32_t vl = env->vl;
708     uint32_t i;
709 
710     for (i = env->vstart; i < vl; i++) {
711         if (!vm && !vext_elem_mask(v0, i)) {
712             continue;
713         }
714         fn(vd, vs1, vs2, i);
715     }
716     env->vstart = 0;
717 }
718 
719 /* generate the helpers for OPIVV */
720 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
721 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
722                   void *vs2, CPURISCVState *env,          \
723                   uint32_t desc)                          \
724 {                                                         \
725     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
726                do_##NAME);                                \
727 }
728 
729 GEN_VEXT_VV(vadd_vv_b, 1, 1)
730 GEN_VEXT_VV(vadd_vv_h, 2, 2)
731 GEN_VEXT_VV(vadd_vv_w, 4, 4)
732 GEN_VEXT_VV(vadd_vv_d, 8, 8)
733 GEN_VEXT_VV(vsub_vv_b, 1, 1)
734 GEN_VEXT_VV(vsub_vv_h, 2, 2)
735 GEN_VEXT_VV(vsub_vv_w, 4, 4)
736 GEN_VEXT_VV(vsub_vv_d, 8, 8)
737 
738 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
739 
740 /*
741  * (T1)s1 gives the real operator type.
742  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
743  */
744 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
745 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
746 {                                                                   \
747     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
748     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
749 }
750 
751 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
752 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
753 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
754 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
755 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
756 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
757 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
758 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
759 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
760 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
761 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
762 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
763 
764 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
765                        CPURISCVState *env, uint32_t desc,
766                        uint32_t esz, uint32_t dsz,
767                        opivx2_fn fn)
768 {
769     uint32_t vm = vext_vm(desc);
770     uint32_t vl = env->vl;
771     uint32_t i;
772 
773     for (i = env->vstart; i < vl; i++) {
774         if (!vm && !vext_elem_mask(v0, i)) {
775             continue;
776         }
777         fn(vd, s1, vs2, i);
778     }
779     env->vstart = 0;
780 }
781 
782 /* generate the helpers for OPIVX */
783 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
784 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
785                   void *vs2, CPURISCVState *env,          \
786                   uint32_t desc)                          \
787 {                                                         \
788     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
789                do_##NAME);                                \
790 }
791 
792 GEN_VEXT_VX(vadd_vx_b, 1, 1)
793 GEN_VEXT_VX(vadd_vx_h, 2, 2)
794 GEN_VEXT_VX(vadd_vx_w, 4, 4)
795 GEN_VEXT_VX(vadd_vx_d, 8, 8)
796 GEN_VEXT_VX(vsub_vx_b, 1, 1)
797 GEN_VEXT_VX(vsub_vx_h, 2, 2)
798 GEN_VEXT_VX(vsub_vx_w, 4, 4)
799 GEN_VEXT_VX(vsub_vx_d, 8, 8)
800 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
801 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
802 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
803 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
804 
805 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
806 {
807     intptr_t oprsz = simd_oprsz(desc);
808     intptr_t i;
809 
810     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
811         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
812     }
813 }
814 
815 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
816 {
817     intptr_t oprsz = simd_oprsz(desc);
818     intptr_t i;
819 
820     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
821         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
822     }
823 }
824 
825 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
826 {
827     intptr_t oprsz = simd_oprsz(desc);
828     intptr_t i;
829 
830     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
831         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
832     }
833 }
834 
835 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
836 {
837     intptr_t oprsz = simd_oprsz(desc);
838     intptr_t i;
839 
840     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
841         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
842     }
843 }
844 
845 /* Vector Widening Integer Add/Subtract */
846 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
847 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
848 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
849 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
850 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
851 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
852 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
853 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
854 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
855 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
856 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
857 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
858 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
859 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
860 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
861 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
862 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
863 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
864 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
865 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
866 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
867 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
868 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
869 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
870 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
871 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
872 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
873 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
874 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
875 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
876 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
877 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
878 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
879 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
880 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
881 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
882 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
883 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
884 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
885 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
886 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
887 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
888 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
889 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
890 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
891 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
892 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
893 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
894 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
895 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
896 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
897 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
898 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
899 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
900 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
901 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
902 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
903 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
904 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
905 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
906 
907 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
908 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
909 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
910 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
911 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
912 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
913 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
914 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
915 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
916 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
917 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
918 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
919 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
920 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
921 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
922 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
923 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
924 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
925 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
926 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
927 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
928 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
929 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
930 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
931 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
932 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
933 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
934 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
935 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
936 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
937 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
938 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
939 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
940 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
941 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
942 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
943 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
944 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
945 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
946 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
947 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
948 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
949 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
950 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
951 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
952 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
953 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
954 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
955 
956 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
957 #define DO_VADC(N, M, C) (N + M + C)
958 #define DO_VSBC(N, M, C) (N - M - C)
959 
960 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
961 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
962                   CPURISCVState *env, uint32_t desc)          \
963 {                                                             \
964     uint32_t vl = env->vl;                                    \
965     uint32_t i;                                               \
966                                                               \
967     for (i = env->vstart; i < vl; i++) {                      \
968         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
969         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
970         ETYPE carry = vext_elem_mask(v0, i);                  \
971                                                               \
972         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
973     }                                                         \
974     env->vstart = 0;                                          \
975 }
976 
977 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
978 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
979 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
980 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
981 
982 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
983 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
984 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
985 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
986 
987 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
988 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
989                   CPURISCVState *env, uint32_t desc)                     \
990 {                                                                        \
991     uint32_t vl = env->vl;                                               \
992     uint32_t i;                                                          \
993                                                                          \
994     for (i = env->vstart; i < vl; i++) {                                 \
995         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
996         ETYPE carry = vext_elem_mask(v0, i);                             \
997                                                                          \
998         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
999     }                                                                    \
1000     env->vstart = 0;                                          \
1001 }
1002 
1003 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
1004 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1005 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1006 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1007 
1008 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
1009 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1010 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1011 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1012 
1013 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
1014                           (__typeof(N))(N + M) < N)
1015 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1016 
1017 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
1018 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1019                   CPURISCVState *env, uint32_t desc)          \
1020 {                                                             \
1021     uint32_t vl = env->vl;                                    \
1022     uint32_t vm = vext_vm(desc);                              \
1023     uint32_t i;                                               \
1024                                                               \
1025     for (i = env->vstart; i < vl; i++) {                      \
1026         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1027         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1028         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1029         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1030     }                                                         \
1031     env->vstart = 0;                                          \
1032 }
1033 
1034 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1035 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1036 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1037 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1038 
1039 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1040 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1041 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1042 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1043 
1044 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1045 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1046                   void *vs2, CPURISCVState *env, uint32_t desc) \
1047 {                                                               \
1048     uint32_t vl = env->vl;                                      \
1049     uint32_t vm = vext_vm(desc);                                \
1050     uint32_t i;                                                 \
1051                                                                 \
1052     for (i = env->vstart; i < vl; i++) {                        \
1053         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1054         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1055         vext_set_elem_mask(vd, i,                               \
1056                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1057     }                                                           \
1058     env->vstart = 0;                                            \
1059 }
1060 
1061 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1062 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1063 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1064 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1065 
1066 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1067 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1068 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1069 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1070 
1071 /* Vector Bitwise Logical Instructions */
1072 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1073 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1074 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1075 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1076 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1077 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1078 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1079 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1080 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1081 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1082 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1083 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1084 GEN_VEXT_VV(vand_vv_b, 1, 1)
1085 GEN_VEXT_VV(vand_vv_h, 2, 2)
1086 GEN_VEXT_VV(vand_vv_w, 4, 4)
1087 GEN_VEXT_VV(vand_vv_d, 8, 8)
1088 GEN_VEXT_VV(vor_vv_b, 1, 1)
1089 GEN_VEXT_VV(vor_vv_h, 2, 2)
1090 GEN_VEXT_VV(vor_vv_w, 4, 4)
1091 GEN_VEXT_VV(vor_vv_d, 8, 8)
1092 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1093 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1094 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1095 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1096 
1097 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1098 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1099 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1100 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1101 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1102 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1103 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1104 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1105 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1106 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1107 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1108 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1109 GEN_VEXT_VX(vand_vx_b, 1, 1)
1110 GEN_VEXT_VX(vand_vx_h, 2, 2)
1111 GEN_VEXT_VX(vand_vx_w, 4, 4)
1112 GEN_VEXT_VX(vand_vx_d, 8, 8)
1113 GEN_VEXT_VX(vor_vx_b, 1, 1)
1114 GEN_VEXT_VX(vor_vx_h, 2, 2)
1115 GEN_VEXT_VX(vor_vx_w, 4, 4)
1116 GEN_VEXT_VX(vor_vx_d, 8, 8)
1117 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1118 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1119 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1120 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1121 
1122 /* Vector Single-Width Bit Shift Instructions */
1123 #define DO_SLL(N, M)  (N << (M))
1124 #define DO_SRL(N, M)  (N >> (M))
1125 
1126 /* generate the helpers for shift instructions with two vector operators */
1127 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1128 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1129                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1130 {                                                                         \
1131     uint32_t vm = vext_vm(desc);                                          \
1132     uint32_t vl = env->vl;                                                \
1133     uint32_t i;                                                           \
1134                                                                           \
1135     for (i = env->vstart; i < vl; i++) {                                  \
1136         if (!vm && !vext_elem_mask(v0, i)) {                              \
1137             continue;                                                     \
1138         }                                                                 \
1139         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1140         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1141         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1142     }                                                                     \
1143     env->vstart = 0;                                                      \
1144 }
1145 
1146 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1147 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1148 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1149 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1150 
1151 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1152 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1153 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1154 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1155 
1156 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1157 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1158 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1159 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1160 
1161 /* generate the helpers for shift instructions with one vector and one scalar */
1162 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1163 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1164         void *vs2, CPURISCVState *env, uint32_t desc)       \
1165 {                                                           \
1166     uint32_t vm = vext_vm(desc);                            \
1167     uint32_t vl = env->vl;                                  \
1168     uint32_t i;                                             \
1169                                                             \
1170     for (i = env->vstart; i < vl; i++) {                    \
1171         if (!vm && !vext_elem_mask(v0, i)) {                \
1172             continue;                                       \
1173         }                                                   \
1174         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1175         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1176     }                                                       \
1177     env->vstart = 0;                                        \
1178 }
1179 
1180 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1181 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1182 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1183 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1184 
1185 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1186 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1187 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1188 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1189 
1190 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1191 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1192 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1193 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1194 
1195 /* Vector Narrowing Integer Right Shift Instructions */
1196 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1197 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1198 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1199 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1200 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1201 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1202 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1203 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1204 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1205 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1206 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1207 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1208 
1209 /* Vector Integer Comparison Instructions */
1210 #define DO_MSEQ(N, M) (N == M)
1211 #define DO_MSNE(N, M) (N != M)
1212 #define DO_MSLT(N, M) (N < M)
1213 #define DO_MSLE(N, M) (N <= M)
1214 #define DO_MSGT(N, M) (N > M)
1215 
1216 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1217 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1218                   CPURISCVState *env, uint32_t desc)          \
1219 {                                                             \
1220     uint32_t vm = vext_vm(desc);                              \
1221     uint32_t vl = env->vl;                                    \
1222     uint32_t i;                                               \
1223                                                               \
1224     for (i = env->vstart; i < vl; i++) {                      \
1225         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1226         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1227         if (!vm && !vext_elem_mask(v0, i)) {                  \
1228             continue;                                         \
1229         }                                                     \
1230         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1231     }                                                         \
1232     env->vstart = 0;                                          \
1233 }
1234 
1235 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1236 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1237 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1238 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1239 
1240 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1241 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1242 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1243 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1244 
1245 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1246 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1247 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1248 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1249 
1250 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1251 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1252 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1253 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1254 
1255 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1256 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1257 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1258 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1259 
1260 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1261 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1262 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1263 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1264 
1265 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1266 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1267                   CPURISCVState *env, uint32_t desc)                \
1268 {                                                                   \
1269     uint32_t vm = vext_vm(desc);                                    \
1270     uint32_t vl = env->vl;                                          \
1271     uint32_t i;                                                     \
1272                                                                     \
1273     for (i = env->vstart; i < vl; i++) {                            \
1274         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1275         if (!vm && !vext_elem_mask(v0, i)) {                        \
1276             continue;                                               \
1277         }                                                           \
1278         vext_set_elem_mask(vd, i,                                   \
1279                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1280     }                                                               \
1281     env->vstart = 0;                                                \
1282 }
1283 
1284 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1285 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1286 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1287 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1288 
1289 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1290 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1291 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1292 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1293 
1294 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1295 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1296 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1297 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1298 
1299 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1300 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1301 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1302 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1303 
1304 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1305 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1306 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1307 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1308 
1309 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1310 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1311 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1312 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1313 
1314 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1315 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1316 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1317 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1318 
1319 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1320 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1321 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1322 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1323 
1324 /* Vector Integer Min/Max Instructions */
1325 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1326 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1327 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1328 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1329 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1330 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1331 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1332 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1333 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1334 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1335 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1336 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1337 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1338 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1339 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1340 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1341 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1342 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1343 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1344 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1345 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1346 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1347 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1348 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1349 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1350 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1351 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1352 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1353 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1354 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1355 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1356 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1357 
1358 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1359 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1360 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1361 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1362 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1363 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1364 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1365 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1366 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1367 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1368 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1369 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1370 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1371 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1372 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1373 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1374 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1375 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1376 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1377 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1378 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1379 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1380 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1381 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1382 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1383 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1384 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1385 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1386 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1387 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1388 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1389 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1390 
1391 /* Vector Single-Width Integer Multiply Instructions */
1392 #define DO_MUL(N, M) (N * M)
1393 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1394 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1395 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1396 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1397 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1398 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1399 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1400 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1401 
1402 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1403 {
1404     return (int16_t)s2 * (int16_t)s1 >> 8;
1405 }
1406 
1407 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1408 {
1409     return (int32_t)s2 * (int32_t)s1 >> 16;
1410 }
1411 
1412 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1413 {
1414     return (int64_t)s2 * (int64_t)s1 >> 32;
1415 }
1416 
1417 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1418 {
1419     uint64_t hi_64, lo_64;
1420 
1421     muls64(&lo_64, &hi_64, s1, s2);
1422     return hi_64;
1423 }
1424 
1425 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1426 {
1427     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1428 }
1429 
1430 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1431 {
1432     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1433 }
1434 
1435 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1436 {
1437     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1438 }
1439 
1440 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1441 {
1442     uint64_t hi_64, lo_64;
1443 
1444     mulu64(&lo_64, &hi_64, s2, s1);
1445     return hi_64;
1446 }
1447 
1448 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1449 {
1450     return (int16_t)s2 * (uint16_t)s1 >> 8;
1451 }
1452 
1453 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1454 {
1455     return (int32_t)s2 * (uint32_t)s1 >> 16;
1456 }
1457 
1458 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1459 {
1460     return (int64_t)s2 * (uint64_t)s1 >> 32;
1461 }
1462 
1463 /*
1464  * Let  A = signed operand,
1465  *      B = unsigned operand
1466  *      P = mulu64(A, B), unsigned product
1467  *
1468  * LET  X = 2 ** 64  - A, 2's complement of A
1469  *      SP = signed product
1470  * THEN
1471  *      IF A < 0
1472  *          SP = -X * B
1473  *             = -(2 ** 64 - A) * B
1474  *             = A * B - 2 ** 64 * B
1475  *             = P - 2 ** 64 * B
1476  *      ELSE
1477  *          SP = P
1478  * THEN
1479  *      HI_P -= (A < 0 ? B : 0)
1480  */
1481 
1482 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1483 {
1484     uint64_t hi_64, lo_64;
1485 
1486     mulu64(&lo_64, &hi_64, s2, s1);
1487 
1488     hi_64 -= s2 < 0 ? s1 : 0;
1489     return hi_64;
1490 }
1491 
1492 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1493 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1494 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1495 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1496 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1497 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1498 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1499 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1500 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1501 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1502 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1503 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1504 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1505 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1506 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1507 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1508 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1509 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1510 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1511 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1512 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1513 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1514 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1515 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1516 
1517 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1518 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1519 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1520 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1521 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1522 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1523 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1524 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1525 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1526 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1527 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1528 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1529 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1530 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1531 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1532 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1533 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1534 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1535 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1536 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1537 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1538 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1539 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1540 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1541 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1542 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1543 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1544 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1545 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1546 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1547 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1548 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1549 
1550 /* Vector Integer Divide Instructions */
1551 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1552 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1553 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1554         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1555 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1556         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1557 
1558 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1559 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1560 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1561 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1562 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1563 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1564 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1565 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1566 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1567 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1568 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1569 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1570 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1571 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1572 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1573 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1574 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1575 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1576 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1577 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1578 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1579 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1580 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1581 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1582 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1583 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1584 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1585 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1586 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1587 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1588 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1589 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1590 
1591 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1592 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1593 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1594 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1595 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1596 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1597 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1598 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1599 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1600 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1601 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1602 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1603 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1604 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1605 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1606 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1607 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1608 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1609 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1610 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1611 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1612 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1613 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1614 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1615 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1616 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1617 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1618 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1619 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1620 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1621 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1622 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1623 
1624 /* Vector Widening Integer Multiply Instructions */
1625 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1626 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1627 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1628 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1629 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1630 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1631 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1632 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1633 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1634 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1635 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1636 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1637 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1638 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1639 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1640 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1641 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1642 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1643 
1644 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1645 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1646 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1647 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1648 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1649 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1650 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1651 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1652 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1653 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1654 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1655 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1656 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1657 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1658 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1659 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1660 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1661 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1662 
1663 /* Vector Single-Width Integer Multiply-Add Instructions */
1664 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1665 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1666 {                                                                  \
1667     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1668     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1669     TD d = *((TD *)vd + HD(i));                                    \
1670     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1671 }
1672 
1673 #define DO_MACC(N, M, D) (M * N + D)
1674 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1675 #define DO_MADD(N, M, D) (M * D + N)
1676 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1677 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1678 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1679 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1680 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1681 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1682 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1683 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1684 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1685 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1686 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1687 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1688 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1689 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1690 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1691 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1692 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1693 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1694 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1695 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1696 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1697 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1698 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1699 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1700 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1701 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1702 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1703 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1704 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1705 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1706 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1707 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1708 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1709 
1710 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1711 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1712 {                                                                   \
1713     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1714     TD d = *((TD *)vd + HD(i));                                     \
1715     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1716 }
1717 
1718 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1719 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1720 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1721 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1722 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1723 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1724 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1725 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1726 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1727 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1728 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1729 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1730 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1731 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1732 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1733 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1734 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1735 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1736 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1737 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1738 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1739 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1740 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1741 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1742 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1743 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1744 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1745 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1746 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1747 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1748 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1749 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1750 
1751 /* Vector Widening Integer Multiply-Add Instructions */
1752 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1753 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1754 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1755 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1756 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1757 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1758 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1759 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1760 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1761 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1762 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1763 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1764 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1765 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1766 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1767 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1768 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1769 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1770 
1771 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1772 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1773 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1774 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1775 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1776 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1777 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1778 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1779 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1780 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1781 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1782 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1783 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1784 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1785 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1786 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1787 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1788 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1789 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1790 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1791 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1792 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1793 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1794 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1795 
1796 /* Vector Integer Merge and Move Instructions */
1797 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1798 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1799                   uint32_t desc)                                     \
1800 {                                                                    \
1801     uint32_t vl = env->vl;                                           \
1802     uint32_t i;                                                      \
1803                                                                      \
1804     for (i = env->vstart; i < vl; i++) {                             \
1805         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1806         *((ETYPE *)vd + H(i)) = s1;                                  \
1807     }                                                                \
1808     env->vstart = 0;                                                 \
1809 }
1810 
1811 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1812 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1813 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1814 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1815 
1816 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1817 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1818                   uint32_t desc)                                     \
1819 {                                                                    \
1820     uint32_t vl = env->vl;                                           \
1821     uint32_t i;                                                      \
1822                                                                      \
1823     for (i = env->vstart; i < vl; i++) {                             \
1824         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1825     }                                                                \
1826     env->vstart = 0;                                                 \
1827 }
1828 
1829 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1830 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1831 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1832 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1833 
1834 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1835 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1836                   CPURISCVState *env, uint32_t desc)                 \
1837 {                                                                    \
1838     uint32_t vl = env->vl;                                           \
1839     uint32_t i;                                                      \
1840                                                                      \
1841     for (i = env->vstart; i < vl; i++) {                             \
1842         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1843         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1844     }                                                                \
1845     env->vstart = 0;                                                 \
1846 }
1847 
1848 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1849 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1850 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1851 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1852 
1853 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1854 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1855                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1856 {                                                                    \
1857     uint32_t vl = env->vl;                                           \
1858     uint32_t i;                                                      \
1859                                                                      \
1860     for (i = env->vstart; i < vl; i++) {                             \
1861         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1862         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1863                    (ETYPE)(target_long)s1);                          \
1864         *((ETYPE *)vd + H(i)) = d;                                   \
1865     }                                                                \
1866     env->vstart = 0;                                                 \
1867 }
1868 
1869 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1870 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1871 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1872 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1873 
1874 /*
1875  *** Vector Fixed-Point Arithmetic Instructions
1876  */
1877 
1878 /* Vector Single-Width Saturating Add and Subtract */
1879 
1880 /*
1881  * As fixed point instructions probably have round mode and saturation,
1882  * define common macros for fixed point here.
1883  */
1884 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1885                           CPURISCVState *env, int vxrm);
1886 
1887 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1888 static inline void                                                  \
1889 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1890           CPURISCVState *env, int vxrm)                             \
1891 {                                                                   \
1892     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1893     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1894     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1895 }
1896 
1897 static inline void
1898 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1899              CPURISCVState *env,
1900              uint32_t vl, uint32_t vm, int vxrm,
1901              opivv2_rm_fn *fn)
1902 {
1903     for (uint32_t i = env->vstart; i < vl; i++) {
1904         if (!vm && !vext_elem_mask(v0, i)) {
1905             continue;
1906         }
1907         fn(vd, vs1, vs2, i, env, vxrm);
1908     }
1909     env->vstart = 0;
1910 }
1911 
1912 static inline void
1913 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1914              CPURISCVState *env,
1915              uint32_t desc, uint32_t esz, uint32_t dsz,
1916              opivv2_rm_fn *fn)
1917 {
1918     uint32_t vm = vext_vm(desc);
1919     uint32_t vl = env->vl;
1920 
1921     switch (env->vxrm) {
1922     case 0: /* rnu */
1923         vext_vv_rm_1(vd, v0, vs1, vs2,
1924                      env, vl, vm, 0, fn);
1925         break;
1926     case 1: /* rne */
1927         vext_vv_rm_1(vd, v0, vs1, vs2,
1928                      env, vl, vm, 1, fn);
1929         break;
1930     case 2: /* rdn */
1931         vext_vv_rm_1(vd, v0, vs1, vs2,
1932                      env, vl, vm, 2, fn);
1933         break;
1934     default: /* rod */
1935         vext_vv_rm_1(vd, v0, vs1, vs2,
1936                      env, vl, vm, 3, fn);
1937         break;
1938     }
1939 }
1940 
1941 /* generate helpers for fixed point instructions with OPIVV format */
1942 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1943 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1944                   CPURISCVState *env, uint32_t desc)            \
1945 {                                                               \
1946     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1947                  do_##NAME);                                    \
1948 }
1949 
1950 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1951 {
1952     uint8_t res = a + b;
1953     if (res < a) {
1954         res = UINT8_MAX;
1955         env->vxsat = 0x1;
1956     }
1957     return res;
1958 }
1959 
1960 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1961                                uint16_t b)
1962 {
1963     uint16_t res = a + b;
1964     if (res < a) {
1965         res = UINT16_MAX;
1966         env->vxsat = 0x1;
1967     }
1968     return res;
1969 }
1970 
1971 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1972                                uint32_t b)
1973 {
1974     uint32_t res = a + b;
1975     if (res < a) {
1976         res = UINT32_MAX;
1977         env->vxsat = 0x1;
1978     }
1979     return res;
1980 }
1981 
1982 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1983                                uint64_t b)
1984 {
1985     uint64_t res = a + b;
1986     if (res < a) {
1987         res = UINT64_MAX;
1988         env->vxsat = 0x1;
1989     }
1990     return res;
1991 }
1992 
1993 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1994 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1995 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
1996 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
1997 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
1998 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
1999 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
2000 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
2001 
2002 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2003                           CPURISCVState *env, int vxrm);
2004 
2005 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2006 static inline void                                                  \
2007 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2008           CPURISCVState *env, int vxrm)                             \
2009 {                                                                   \
2010     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2011     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2012 }
2013 
2014 static inline void
2015 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2016              CPURISCVState *env,
2017              uint32_t vl, uint32_t vm, int vxrm,
2018              opivx2_rm_fn *fn)
2019 {
2020     for (uint32_t i = env->vstart; i < vl; i++) {
2021         if (!vm && !vext_elem_mask(v0, i)) {
2022             continue;
2023         }
2024         fn(vd, s1, vs2, i, env, vxrm);
2025     }
2026     env->vstart = 0;
2027 }
2028 
2029 static inline void
2030 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2031              CPURISCVState *env,
2032              uint32_t desc, uint32_t esz, uint32_t dsz,
2033              opivx2_rm_fn *fn)
2034 {
2035     uint32_t vm = vext_vm(desc);
2036     uint32_t vl = env->vl;
2037 
2038     switch (env->vxrm) {
2039     case 0: /* rnu */
2040         vext_vx_rm_1(vd, v0, s1, vs2,
2041                      env, vl, vm, 0, fn);
2042         break;
2043     case 1: /* rne */
2044         vext_vx_rm_1(vd, v0, s1, vs2,
2045                      env, vl, vm, 1, fn);
2046         break;
2047     case 2: /* rdn */
2048         vext_vx_rm_1(vd, v0, s1, vs2,
2049                      env, vl, vm, 2, fn);
2050         break;
2051     default: /* rod */
2052         vext_vx_rm_1(vd, v0, s1, vs2,
2053                      env, vl, vm, 3, fn);
2054         break;
2055     }
2056 }
2057 
2058 /* generate helpers for fixed point instructions with OPIVX format */
2059 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2060 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2061         void *vs2, CPURISCVState *env, uint32_t desc)     \
2062 {                                                         \
2063     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2064                  do_##NAME);                              \
2065 }
2066 
2067 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2068 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2069 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2070 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2071 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2072 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2073 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2074 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2075 
2076 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2077 {
2078     int8_t res = a + b;
2079     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2080         res = a > 0 ? INT8_MAX : INT8_MIN;
2081         env->vxsat = 0x1;
2082     }
2083     return res;
2084 }
2085 
2086 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2087 {
2088     int16_t res = a + b;
2089     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2090         res = a > 0 ? INT16_MAX : INT16_MIN;
2091         env->vxsat = 0x1;
2092     }
2093     return res;
2094 }
2095 
2096 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2097 {
2098     int32_t res = a + b;
2099     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2100         res = a > 0 ? INT32_MAX : INT32_MIN;
2101         env->vxsat = 0x1;
2102     }
2103     return res;
2104 }
2105 
2106 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2107 {
2108     int64_t res = a + b;
2109     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2110         res = a > 0 ? INT64_MAX : INT64_MIN;
2111         env->vxsat = 0x1;
2112     }
2113     return res;
2114 }
2115 
2116 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2117 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2118 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2119 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2120 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2121 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2122 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2123 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2124 
2125 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2126 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2127 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2128 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2129 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2130 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2131 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2132 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2133 
2134 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2135 {
2136     uint8_t res = a - b;
2137     if (res > a) {
2138         res = 0;
2139         env->vxsat = 0x1;
2140     }
2141     return res;
2142 }
2143 
2144 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2145                                uint16_t b)
2146 {
2147     uint16_t res = a - b;
2148     if (res > a) {
2149         res = 0;
2150         env->vxsat = 0x1;
2151     }
2152     return res;
2153 }
2154 
2155 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2156                                uint32_t b)
2157 {
2158     uint32_t res = a - b;
2159     if (res > a) {
2160         res = 0;
2161         env->vxsat = 0x1;
2162     }
2163     return res;
2164 }
2165 
2166 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2167                                uint64_t b)
2168 {
2169     uint64_t res = a - b;
2170     if (res > a) {
2171         res = 0;
2172         env->vxsat = 0x1;
2173     }
2174     return res;
2175 }
2176 
2177 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2178 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2179 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2180 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2181 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2182 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2183 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2184 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2185 
2186 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2187 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2188 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2189 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2190 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2191 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2192 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2193 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2194 
2195 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2196 {
2197     int8_t res = a - b;
2198     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2199         res = a >= 0 ? INT8_MAX : INT8_MIN;
2200         env->vxsat = 0x1;
2201     }
2202     return res;
2203 }
2204 
2205 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2206 {
2207     int16_t res = a - b;
2208     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2209         res = a >= 0 ? INT16_MAX : INT16_MIN;
2210         env->vxsat = 0x1;
2211     }
2212     return res;
2213 }
2214 
2215 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2216 {
2217     int32_t res = a - b;
2218     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2219         res = a >= 0 ? INT32_MAX : INT32_MIN;
2220         env->vxsat = 0x1;
2221     }
2222     return res;
2223 }
2224 
2225 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2226 {
2227     int64_t res = a - b;
2228     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2229         res = a >= 0 ? INT64_MAX : INT64_MIN;
2230         env->vxsat = 0x1;
2231     }
2232     return res;
2233 }
2234 
2235 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2236 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2237 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2238 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2239 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2240 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2241 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2242 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2243 
2244 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2245 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2246 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2247 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2248 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2249 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2250 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2251 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2252 
2253 /* Vector Single-Width Averaging Add and Subtract */
2254 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2255 {
2256     uint8_t d = extract64(v, shift, 1);
2257     uint8_t d1;
2258     uint64_t D1, D2;
2259 
2260     if (shift == 0 || shift > 64) {
2261         return 0;
2262     }
2263 
2264     d1 = extract64(v, shift - 1, 1);
2265     D1 = extract64(v, 0, shift);
2266     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2267         return d1;
2268     } else if (vxrm == 1) { /* round-to-nearest-even */
2269         if (shift > 1) {
2270             D2 = extract64(v, 0, shift - 1);
2271             return d1 & ((D2 != 0) | d);
2272         } else {
2273             return d1 & d;
2274         }
2275     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2276         return !d & (D1 != 0);
2277     }
2278     return 0; /* round-down (truncate) */
2279 }
2280 
2281 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2282 {
2283     int64_t res = (int64_t)a + b;
2284     uint8_t round = get_round(vxrm, res, 1);
2285 
2286     return (res >> 1) + round;
2287 }
2288 
2289 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2290 {
2291     int64_t res = a + b;
2292     uint8_t round = get_round(vxrm, res, 1);
2293     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2294 
2295     /* With signed overflow, bit 64 is inverse of bit 63. */
2296     return ((res >> 1) ^ over) + round;
2297 }
2298 
2299 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2300 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2301 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2302 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2303 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2304 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2305 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2306 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2307 
2308 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2309 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2310 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2311 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2312 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2313 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2314 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2315 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2316 
2317 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2318                                uint32_t a, uint32_t b)
2319 {
2320     uint64_t res = (uint64_t)a + b;
2321     uint8_t round = get_round(vxrm, res, 1);
2322 
2323     return (res >> 1) + round;
2324 }
2325 
2326 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2327                                uint64_t a, uint64_t b)
2328 {
2329     uint64_t res = a + b;
2330     uint8_t round = get_round(vxrm, res, 1);
2331     uint64_t over = (uint64_t)(res < a) << 63;
2332 
2333     return ((res >> 1) | over) + round;
2334 }
2335 
2336 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2337 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2338 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2339 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2340 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2341 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2342 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2343 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2344 
2345 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2346 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2347 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2348 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2349 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2350 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2351 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2352 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2353 
2354 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2355 {
2356     int64_t res = (int64_t)a - b;
2357     uint8_t round = get_round(vxrm, res, 1);
2358 
2359     return (res >> 1) + round;
2360 }
2361 
2362 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2363 {
2364     int64_t res = (int64_t)a - b;
2365     uint8_t round = get_round(vxrm, res, 1);
2366     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2367 
2368     /* With signed overflow, bit 64 is inverse of bit 63. */
2369     return ((res >> 1) ^ over) + round;
2370 }
2371 
2372 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2373 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2374 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2375 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2376 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2377 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2378 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2379 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2380 
2381 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2382 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2383 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2384 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2385 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2386 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2387 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2388 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2389 
2390 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2391                                uint32_t a, uint32_t b)
2392 {
2393     int64_t res = (int64_t)a - b;
2394     uint8_t round = get_round(vxrm, res, 1);
2395 
2396     return (res >> 1) + round;
2397 }
2398 
2399 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2400                                uint64_t a, uint64_t b)
2401 {
2402     uint64_t res = (uint64_t)a - b;
2403     uint8_t round = get_round(vxrm, res, 1);
2404     uint64_t over = (uint64_t)(res > a) << 63;
2405 
2406     return ((res >> 1) | over) + round;
2407 }
2408 
2409 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2410 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2411 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2412 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2413 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2414 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2415 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2416 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2417 
2418 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2419 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2420 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2421 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2422 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2423 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2424 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2425 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2426 
2427 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2428 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2429 {
2430     uint8_t round;
2431     int16_t res;
2432 
2433     res = (int16_t)a * (int16_t)b;
2434     round = get_round(vxrm, res, 7);
2435     res   = (res >> 7) + round;
2436 
2437     if (res > INT8_MAX) {
2438         env->vxsat = 0x1;
2439         return INT8_MAX;
2440     } else if (res < INT8_MIN) {
2441         env->vxsat = 0x1;
2442         return INT8_MIN;
2443     } else {
2444         return res;
2445     }
2446 }
2447 
2448 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2449 {
2450     uint8_t round;
2451     int32_t res;
2452 
2453     res = (int32_t)a * (int32_t)b;
2454     round = get_round(vxrm, res, 15);
2455     res   = (res >> 15) + round;
2456 
2457     if (res > INT16_MAX) {
2458         env->vxsat = 0x1;
2459         return INT16_MAX;
2460     } else if (res < INT16_MIN) {
2461         env->vxsat = 0x1;
2462         return INT16_MIN;
2463     } else {
2464         return res;
2465     }
2466 }
2467 
2468 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2469 {
2470     uint8_t round;
2471     int64_t res;
2472 
2473     res = (int64_t)a * (int64_t)b;
2474     round = get_round(vxrm, res, 31);
2475     res   = (res >> 31) + round;
2476 
2477     if (res > INT32_MAX) {
2478         env->vxsat = 0x1;
2479         return INT32_MAX;
2480     } else if (res < INT32_MIN) {
2481         env->vxsat = 0x1;
2482         return INT32_MIN;
2483     } else {
2484         return res;
2485     }
2486 }
2487 
2488 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2489 {
2490     uint8_t round;
2491     uint64_t hi_64, lo_64;
2492     int64_t res;
2493 
2494     if (a == INT64_MIN && b == INT64_MIN) {
2495         env->vxsat = 1;
2496         return INT64_MAX;
2497     }
2498 
2499     muls64(&lo_64, &hi_64, a, b);
2500     round = get_round(vxrm, lo_64, 63);
2501     /*
2502      * Cannot overflow, as there are always
2503      * 2 sign bits after multiply.
2504      */
2505     res = (hi_64 << 1) | (lo_64 >> 63);
2506     if (round) {
2507         if (res == INT64_MAX) {
2508             env->vxsat = 1;
2509         } else {
2510             res += 1;
2511         }
2512     }
2513     return res;
2514 }
2515 
2516 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2517 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2518 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2519 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2520 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2521 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2522 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2523 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2524 
2525 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2526 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2527 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2528 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2529 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2530 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2531 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2532 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2533 
2534 /* Vector Single-Width Scaling Shift Instructions */
2535 static inline uint8_t
2536 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2537 {
2538     uint8_t round, shift = b & 0x7;
2539     uint8_t res;
2540 
2541     round = get_round(vxrm, a, shift);
2542     res   = (a >> shift)  + round;
2543     return res;
2544 }
2545 static inline uint16_t
2546 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2547 {
2548     uint8_t round, shift = b & 0xf;
2549     uint16_t res;
2550 
2551     round = get_round(vxrm, a, shift);
2552     res   = (a >> shift)  + round;
2553     return res;
2554 }
2555 static inline uint32_t
2556 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2557 {
2558     uint8_t round, shift = b & 0x1f;
2559     uint32_t res;
2560 
2561     round = get_round(vxrm, a, shift);
2562     res   = (a >> shift)  + round;
2563     return res;
2564 }
2565 static inline uint64_t
2566 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2567 {
2568     uint8_t round, shift = b & 0x3f;
2569     uint64_t res;
2570 
2571     round = get_round(vxrm, a, shift);
2572     res   = (a >> shift)  + round;
2573     return res;
2574 }
2575 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2576 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2577 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2578 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2579 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2580 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2581 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2582 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2583 
2584 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2585 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2586 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2587 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2588 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2589 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2590 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2591 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2592 
2593 static inline int8_t
2594 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2595 {
2596     uint8_t round, shift = b & 0x7;
2597     int8_t res;
2598 
2599     round = get_round(vxrm, a, shift);
2600     res   = (a >> shift)  + round;
2601     return res;
2602 }
2603 static inline int16_t
2604 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2605 {
2606     uint8_t round, shift = b & 0xf;
2607     int16_t res;
2608 
2609     round = get_round(vxrm, a, shift);
2610     res   = (a >> shift)  + round;
2611     return res;
2612 }
2613 static inline int32_t
2614 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2615 {
2616     uint8_t round, shift = b & 0x1f;
2617     int32_t res;
2618 
2619     round = get_round(vxrm, a, shift);
2620     res   = (a >> shift)  + round;
2621     return res;
2622 }
2623 static inline int64_t
2624 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2625 {
2626     uint8_t round, shift = b & 0x3f;
2627     int64_t res;
2628 
2629     round = get_round(vxrm, a, shift);
2630     res   = (a >> shift)  + round;
2631     return res;
2632 }
2633 
2634 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2635 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2636 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2637 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2638 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2639 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2640 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2641 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2642 
2643 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2644 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2645 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2646 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2647 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2648 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2649 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2650 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2651 
2652 /* Vector Narrowing Fixed-Point Clip Instructions */
2653 static inline int8_t
2654 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2655 {
2656     uint8_t round, shift = b & 0xf;
2657     int16_t res;
2658 
2659     round = get_round(vxrm, a, shift);
2660     res   = (a >> shift)  + round;
2661     if (res > INT8_MAX) {
2662         env->vxsat = 0x1;
2663         return INT8_MAX;
2664     } else if (res < INT8_MIN) {
2665         env->vxsat = 0x1;
2666         return INT8_MIN;
2667     } else {
2668         return res;
2669     }
2670 }
2671 
2672 static inline int16_t
2673 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2674 {
2675     uint8_t round, shift = b & 0x1f;
2676     int32_t res;
2677 
2678     round = get_round(vxrm, a, shift);
2679     res   = (a >> shift)  + round;
2680     if (res > INT16_MAX) {
2681         env->vxsat = 0x1;
2682         return INT16_MAX;
2683     } else if (res < INT16_MIN) {
2684         env->vxsat = 0x1;
2685         return INT16_MIN;
2686     } else {
2687         return res;
2688     }
2689 }
2690 
2691 static inline int32_t
2692 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2693 {
2694     uint8_t round, shift = b & 0x3f;
2695     int64_t res;
2696 
2697     round = get_round(vxrm, a, shift);
2698     res   = (a >> shift)  + round;
2699     if (res > INT32_MAX) {
2700         env->vxsat = 0x1;
2701         return INT32_MAX;
2702     } else if (res < INT32_MIN) {
2703         env->vxsat = 0x1;
2704         return INT32_MIN;
2705     } else {
2706         return res;
2707     }
2708 }
2709 
2710 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2711 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2712 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2713 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2714 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2715 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2716 
2717 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2718 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2719 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2720 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2721 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2722 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2723 
2724 static inline uint8_t
2725 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2726 {
2727     uint8_t round, shift = b & 0xf;
2728     uint16_t res;
2729 
2730     round = get_round(vxrm, a, shift);
2731     res   = (a >> shift)  + round;
2732     if (res > UINT8_MAX) {
2733         env->vxsat = 0x1;
2734         return UINT8_MAX;
2735     } else {
2736         return res;
2737     }
2738 }
2739 
2740 static inline uint16_t
2741 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2742 {
2743     uint8_t round, shift = b & 0x1f;
2744     uint32_t res;
2745 
2746     round = get_round(vxrm, a, shift);
2747     res   = (a >> shift)  + round;
2748     if (res > UINT16_MAX) {
2749         env->vxsat = 0x1;
2750         return UINT16_MAX;
2751     } else {
2752         return res;
2753     }
2754 }
2755 
2756 static inline uint32_t
2757 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2758 {
2759     uint8_t round, shift = b & 0x3f;
2760     uint64_t res;
2761 
2762     round = get_round(vxrm, a, shift);
2763     res   = (a >> shift)  + round;
2764     if (res > UINT32_MAX) {
2765         env->vxsat = 0x1;
2766         return UINT32_MAX;
2767     } else {
2768         return res;
2769     }
2770 }
2771 
2772 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2773 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2774 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2775 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2776 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2777 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2778 
2779 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2780 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2781 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2782 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2783 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2784 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2785 
2786 /*
2787  *** Vector Float Point Arithmetic Instructions
2788  */
2789 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2790 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2791 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2792                       CPURISCVState *env)                      \
2793 {                                                              \
2794     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2795     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2796     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2797 }
2798 
2799 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2800 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2801                   void *vs2, CPURISCVState *env,          \
2802                   uint32_t desc)                          \
2803 {                                                         \
2804     uint32_t vm = vext_vm(desc);                          \
2805     uint32_t vl = env->vl;                                \
2806     uint32_t i;                                           \
2807                                                           \
2808     for (i = env->vstart; i < vl; i++) {                  \
2809         if (!vm && !vext_elem_mask(v0, i)) {              \
2810             continue;                                     \
2811         }                                                 \
2812         do_##NAME(vd, vs1, vs2, i, env);                  \
2813     }                                                     \
2814     env->vstart = 0;                                      \
2815 }
2816 
2817 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2818 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2819 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2820 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2821 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2822 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2823 
2824 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2825 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2826                       CPURISCVState *env)                      \
2827 {                                                              \
2828     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2829     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2830 }
2831 
2832 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2833 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2834                   void *vs2, CPURISCVState *env,          \
2835                   uint32_t desc)                          \
2836 {                                                         \
2837     uint32_t vm = vext_vm(desc);                          \
2838     uint32_t vl = env->vl;                                \
2839     uint32_t i;                                           \
2840                                                           \
2841     for (i = env->vstart; i < vl; i++) {                  \
2842         if (!vm && !vext_elem_mask(v0, i)) {              \
2843             continue;                                     \
2844         }                                                 \
2845         do_##NAME(vd, s1, vs2, i, env);                   \
2846     }                                                     \
2847     env->vstart = 0;                                      \
2848 }
2849 
2850 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2851 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2852 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2853 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2854 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2855 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2856 
2857 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2858 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2859 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2860 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2861 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2862 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2863 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2864 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2865 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2866 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2867 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2868 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2869 
2870 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2871 {
2872     return float16_sub(b, a, s);
2873 }
2874 
2875 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2876 {
2877     return float32_sub(b, a, s);
2878 }
2879 
2880 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2881 {
2882     return float64_sub(b, a, s);
2883 }
2884 
2885 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2886 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2887 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2888 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2889 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2890 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2891 
2892 /* Vector Widening Floating-Point Add/Subtract Instructions */
2893 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2894 {
2895     return float32_add(float16_to_float32(a, true, s),
2896             float16_to_float32(b, true, s), s);
2897 }
2898 
2899 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2900 {
2901     return float64_add(float32_to_float64(a, s),
2902             float32_to_float64(b, s), s);
2903 
2904 }
2905 
2906 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2907 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2908 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2909 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2910 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2911 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2912 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2913 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2914 
2915 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2916 {
2917     return float32_sub(float16_to_float32(a, true, s),
2918             float16_to_float32(b, true, s), s);
2919 }
2920 
2921 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2922 {
2923     return float64_sub(float32_to_float64(a, s),
2924             float32_to_float64(b, s), s);
2925 
2926 }
2927 
2928 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2929 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2930 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2931 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2932 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2933 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2934 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2935 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2936 
2937 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2938 {
2939     return float32_add(a, float16_to_float32(b, true, s), s);
2940 }
2941 
2942 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2943 {
2944     return float64_add(a, float32_to_float64(b, s), s);
2945 }
2946 
2947 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2948 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2949 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2950 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2951 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2952 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2953 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2954 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2955 
2956 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2957 {
2958     return float32_sub(a, float16_to_float32(b, true, s), s);
2959 }
2960 
2961 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2962 {
2963     return float64_sub(a, float32_to_float64(b, s), s);
2964 }
2965 
2966 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2967 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2968 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2969 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2970 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2971 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2972 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2973 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2974 
2975 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2976 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2977 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2978 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2979 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2980 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2981 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2982 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2983 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2984 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2985 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2986 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2987 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2988 
2989 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2990 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2991 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
2992 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
2993 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
2994 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
2995 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
2996 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
2997 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
2998 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
2999 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3000 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3001 
3002 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3003 {
3004     return float16_div(b, a, s);
3005 }
3006 
3007 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3008 {
3009     return float32_div(b, a, s);
3010 }
3011 
3012 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3013 {
3014     return float64_div(b, a, s);
3015 }
3016 
3017 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3018 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3019 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3020 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3021 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3022 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3023 
3024 /* Vector Widening Floating-Point Multiply */
3025 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3026 {
3027     return float32_mul(float16_to_float32(a, true, s),
3028             float16_to_float32(b, true, s), s);
3029 }
3030 
3031 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3032 {
3033     return float64_mul(float32_to_float64(a, s),
3034             float32_to_float64(b, s), s);
3035 
3036 }
3037 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3038 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3039 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3040 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3041 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3042 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3043 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3044 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3045 
3046 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3047 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3048 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3049         CPURISCVState *env)                                        \
3050 {                                                                  \
3051     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3052     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3053     TD d = *((TD *)vd + HD(i));                                    \
3054     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3055 }
3056 
3057 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3058 {
3059     return float16_muladd(a, b, d, 0, s);
3060 }
3061 
3062 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3063 {
3064     return float32_muladd(a, b, d, 0, s);
3065 }
3066 
3067 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3068 {
3069     return float64_muladd(a, b, d, 0, s);
3070 }
3071 
3072 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3073 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3074 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3075 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3076 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3077 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3078 
3079 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3080 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3081         CPURISCVState *env)                                       \
3082 {                                                                 \
3083     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3084     TD d = *((TD *)vd + HD(i));                                   \
3085     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3086 }
3087 
3088 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3089 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3090 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3091 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3092 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3093 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3094 
3095 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3096 {
3097     return float16_muladd(a, b, d,
3098             float_muladd_negate_c | float_muladd_negate_product, s);
3099 }
3100 
3101 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3102 {
3103     return float32_muladd(a, b, d,
3104             float_muladd_negate_c | float_muladd_negate_product, s);
3105 }
3106 
3107 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3108 {
3109     return float64_muladd(a, b, d,
3110             float_muladd_negate_c | float_muladd_negate_product, s);
3111 }
3112 
3113 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3114 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3115 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3116 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3117 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3118 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3119 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3120 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3121 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3122 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3123 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3124 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3125 
3126 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3127 {
3128     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3129 }
3130 
3131 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3132 {
3133     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3134 }
3135 
3136 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3137 {
3138     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3139 }
3140 
3141 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3142 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3143 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3144 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3145 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3146 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3147 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3148 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3149 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3150 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3151 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3152 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3153 
3154 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3155 {
3156     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3157 }
3158 
3159 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3160 {
3161     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3162 }
3163 
3164 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3165 {
3166     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3167 }
3168 
3169 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3170 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3171 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3172 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3173 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3174 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3175 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3176 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3177 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3178 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3179 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3180 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3181 
3182 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3183 {
3184     return float16_muladd(d, b, a, 0, s);
3185 }
3186 
3187 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3188 {
3189     return float32_muladd(d, b, a, 0, s);
3190 }
3191 
3192 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3193 {
3194     return float64_muladd(d, b, a, 0, s);
3195 }
3196 
3197 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3198 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3199 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3200 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3201 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3202 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3203 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3204 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3205 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3206 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3207 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3208 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3209 
3210 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3211 {
3212     return float16_muladd(d, b, a,
3213             float_muladd_negate_c | float_muladd_negate_product, s);
3214 }
3215 
3216 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3217 {
3218     return float32_muladd(d, b, a,
3219             float_muladd_negate_c | float_muladd_negate_product, s);
3220 }
3221 
3222 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3223 {
3224     return float64_muladd(d, b, a,
3225             float_muladd_negate_c | float_muladd_negate_product, s);
3226 }
3227 
3228 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3229 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3230 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3231 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3232 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3233 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3234 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3235 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3236 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3237 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3238 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3239 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3240 
3241 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3242 {
3243     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3244 }
3245 
3246 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3247 {
3248     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3249 }
3250 
3251 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3252 {
3253     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3254 }
3255 
3256 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3257 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3258 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3259 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3260 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3261 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3262 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3263 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3264 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3265 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3266 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3267 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3268 
3269 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3270 {
3271     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3272 }
3273 
3274 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3275 {
3276     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3277 }
3278 
3279 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3280 {
3281     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3282 }
3283 
3284 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3285 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3286 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3287 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3288 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3289 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3290 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3291 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3292 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3293 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3294 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3295 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3296 
3297 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3298 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3299 {
3300     return float32_muladd(float16_to_float32(a, true, s),
3301                         float16_to_float32(b, true, s), d, 0, s);
3302 }
3303 
3304 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3305 {
3306     return float64_muladd(float32_to_float64(a, s),
3307                         float32_to_float64(b, s), d, 0, s);
3308 }
3309 
3310 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3311 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3312 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3313 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3314 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3315 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3316 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3317 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3318 
3319 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3320 {
3321     return float32_muladd(float16_to_float32(a, true, s),
3322                         float16_to_float32(b, true, s), d,
3323                         float_muladd_negate_c | float_muladd_negate_product, s);
3324 }
3325 
3326 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3327 {
3328     return float64_muladd(float32_to_float64(a, s),
3329                         float32_to_float64(b, s), d,
3330                         float_muladd_negate_c | float_muladd_negate_product, s);
3331 }
3332 
3333 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3334 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3335 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3336 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3337 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3338 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3339 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3340 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3341 
3342 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3343 {
3344     return float32_muladd(float16_to_float32(a, true, s),
3345                         float16_to_float32(b, true, s), d,
3346                         float_muladd_negate_c, s);
3347 }
3348 
3349 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3350 {
3351     return float64_muladd(float32_to_float64(a, s),
3352                         float32_to_float64(b, s), d,
3353                         float_muladd_negate_c, s);
3354 }
3355 
3356 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3357 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3358 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3359 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3360 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3361 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3362 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3363 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3364 
3365 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3366 {
3367     return float32_muladd(float16_to_float32(a, true, s),
3368                         float16_to_float32(b, true, s), d,
3369                         float_muladd_negate_product, s);
3370 }
3371 
3372 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3373 {
3374     return float64_muladd(float32_to_float64(a, s),
3375                         float32_to_float64(b, s), d,
3376                         float_muladd_negate_product, s);
3377 }
3378 
3379 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3380 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3381 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3382 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3383 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3384 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3385 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3386 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3387 
3388 /* Vector Floating-Point Square-Root Instruction */
3389 /* (TD, T2, TX2) */
3390 #define OP_UU_H uint16_t, uint16_t, uint16_t
3391 #define OP_UU_W uint32_t, uint32_t, uint32_t
3392 #define OP_UU_D uint64_t, uint64_t, uint64_t
3393 
3394 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3395 static void do_##NAME(void *vd, void *vs2, int i,      \
3396         CPURISCVState *env)                            \
3397 {                                                      \
3398     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3399     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3400 }
3401 
3402 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3403 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3404         CPURISCVState *env, uint32_t desc)             \
3405 {                                                      \
3406     uint32_t vm = vext_vm(desc);                       \
3407     uint32_t vl = env->vl;                             \
3408     uint32_t i;                                        \
3409                                                        \
3410     if (vl == 0) {                                     \
3411         return;                                        \
3412     }                                                  \
3413     for (i = env->vstart; i < vl; i++) {               \
3414         if (!vm && !vext_elem_mask(v0, i)) {           \
3415             continue;                                  \
3416         }                                              \
3417         do_##NAME(vd, vs2, i, env);                    \
3418     }                                                  \
3419     env->vstart = 0;                                   \
3420 }
3421 
3422 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3423 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3424 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3425 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3426 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3427 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3428 
3429 /*
3430  * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3431  *
3432  * Adapted from riscv-v-spec recip.c:
3433  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3434  */
3435 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3436 {
3437     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3438     uint64_t exp = extract64(f, frac_size, exp_size);
3439     uint64_t frac = extract64(f, 0, frac_size);
3440 
3441     const uint8_t lookup_table[] = {
3442         52, 51, 50, 48, 47, 46, 44, 43,
3443         42, 41, 40, 39, 38, 36, 35, 34,
3444         33, 32, 31, 30, 30, 29, 28, 27,
3445         26, 25, 24, 23, 23, 22, 21, 20,
3446         19, 19, 18, 17, 16, 16, 15, 14,
3447         14, 13, 12, 12, 11, 10, 10, 9,
3448         9, 8, 7, 7, 6, 6, 5, 4,
3449         4, 3, 3, 2, 2, 1, 1, 0,
3450         127, 125, 123, 121, 119, 118, 116, 114,
3451         113, 111, 109, 108, 106, 105, 103, 102,
3452         100, 99, 97, 96, 95, 93, 92, 91,
3453         90, 88, 87, 86, 85, 84, 83, 82,
3454         80, 79, 78, 77, 76, 75, 74, 73,
3455         72, 71, 70, 70, 69, 68, 67, 66,
3456         65, 64, 63, 63, 62, 61, 60, 59,
3457         59, 58, 57, 56, 56, 55, 54, 53
3458     };
3459     const int precision = 7;
3460 
3461     if (exp == 0 && frac != 0) { /* subnormal */
3462         /* Normalize the subnormal. */
3463         while (extract64(frac, frac_size - 1, 1) == 0) {
3464             exp--;
3465             frac <<= 1;
3466         }
3467 
3468         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3469     }
3470 
3471     int idx = ((exp & 1) << (precision - 1)) |
3472                 (frac >> (frac_size - precision + 1));
3473     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3474                             (frac_size - precision);
3475     uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3476 
3477     uint64_t val = 0;
3478     val = deposit64(val, 0, frac_size, out_frac);
3479     val = deposit64(val, frac_size, exp_size, out_exp);
3480     val = deposit64(val, frac_size + exp_size, 1, sign);
3481     return val;
3482 }
3483 
3484 static float16 frsqrt7_h(float16 f, float_status *s)
3485 {
3486     int exp_size = 5, frac_size = 10;
3487     bool sign = float16_is_neg(f);
3488 
3489     /*
3490      * frsqrt7(sNaN) = canonical NaN
3491      * frsqrt7(-inf) = canonical NaN
3492      * frsqrt7(-normal) = canonical NaN
3493      * frsqrt7(-subnormal) = canonical NaN
3494      */
3495     if (float16_is_signaling_nan(f, s) ||
3496             (float16_is_infinity(f) && sign) ||
3497             (float16_is_normal(f) && sign) ||
3498             (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3499         s->float_exception_flags |= float_flag_invalid;
3500         return float16_default_nan(s);
3501     }
3502 
3503     /* frsqrt7(qNaN) = canonical NaN */
3504     if (float16_is_quiet_nan(f, s)) {
3505         return float16_default_nan(s);
3506     }
3507 
3508     /* frsqrt7(+-0) = +-inf */
3509     if (float16_is_zero(f)) {
3510         s->float_exception_flags |= float_flag_divbyzero;
3511         return float16_set_sign(float16_infinity, sign);
3512     }
3513 
3514     /* frsqrt7(+inf) = +0 */
3515     if (float16_is_infinity(f) && !sign) {
3516         return float16_set_sign(float16_zero, sign);
3517     }
3518 
3519     /* +normal, +subnormal */
3520     uint64_t val = frsqrt7(f, exp_size, frac_size);
3521     return make_float16(val);
3522 }
3523 
3524 static float32 frsqrt7_s(float32 f, float_status *s)
3525 {
3526     int exp_size = 8, frac_size = 23;
3527     bool sign = float32_is_neg(f);
3528 
3529     /*
3530      * frsqrt7(sNaN) = canonical NaN
3531      * frsqrt7(-inf) = canonical NaN
3532      * frsqrt7(-normal) = canonical NaN
3533      * frsqrt7(-subnormal) = canonical NaN
3534      */
3535     if (float32_is_signaling_nan(f, s) ||
3536             (float32_is_infinity(f) && sign) ||
3537             (float32_is_normal(f) && sign) ||
3538             (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3539         s->float_exception_flags |= float_flag_invalid;
3540         return float32_default_nan(s);
3541     }
3542 
3543     /* frsqrt7(qNaN) = canonical NaN */
3544     if (float32_is_quiet_nan(f, s)) {
3545         return float32_default_nan(s);
3546     }
3547 
3548     /* frsqrt7(+-0) = +-inf */
3549     if (float32_is_zero(f)) {
3550         s->float_exception_flags |= float_flag_divbyzero;
3551         return float32_set_sign(float32_infinity, sign);
3552     }
3553 
3554     /* frsqrt7(+inf) = +0 */
3555     if (float32_is_infinity(f) && !sign) {
3556         return float32_set_sign(float32_zero, sign);
3557     }
3558 
3559     /* +normal, +subnormal */
3560     uint64_t val = frsqrt7(f, exp_size, frac_size);
3561     return make_float32(val);
3562 }
3563 
3564 static float64 frsqrt7_d(float64 f, float_status *s)
3565 {
3566     int exp_size = 11, frac_size = 52;
3567     bool sign = float64_is_neg(f);
3568 
3569     /*
3570      * frsqrt7(sNaN) = canonical NaN
3571      * frsqrt7(-inf) = canonical NaN
3572      * frsqrt7(-normal) = canonical NaN
3573      * frsqrt7(-subnormal) = canonical NaN
3574      */
3575     if (float64_is_signaling_nan(f, s) ||
3576             (float64_is_infinity(f) && sign) ||
3577             (float64_is_normal(f) && sign) ||
3578             (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3579         s->float_exception_flags |= float_flag_invalid;
3580         return float64_default_nan(s);
3581     }
3582 
3583     /* frsqrt7(qNaN) = canonical NaN */
3584     if (float64_is_quiet_nan(f, s)) {
3585         return float64_default_nan(s);
3586     }
3587 
3588     /* frsqrt7(+-0) = +-inf */
3589     if (float64_is_zero(f)) {
3590         s->float_exception_flags |= float_flag_divbyzero;
3591         return float64_set_sign(float64_infinity, sign);
3592     }
3593 
3594     /* frsqrt7(+inf) = +0 */
3595     if (float64_is_infinity(f) && !sign) {
3596         return float64_set_sign(float64_zero, sign);
3597     }
3598 
3599     /* +normal, +subnormal */
3600     uint64_t val = frsqrt7(f, exp_size, frac_size);
3601     return make_float64(val);
3602 }
3603 
3604 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3605 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3606 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3607 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
3608 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
3609 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
3610 
3611 /*
3612  * Vector Floating-Point Reciprocal Estimate Instruction
3613  *
3614  * Adapted from riscv-v-spec recip.c:
3615  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3616  */
3617 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3618                       float_status *s)
3619 {
3620     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3621     uint64_t exp = extract64(f, frac_size, exp_size);
3622     uint64_t frac = extract64(f, 0, frac_size);
3623 
3624     const uint8_t lookup_table[] = {
3625         127, 125, 123, 121, 119, 117, 116, 114,
3626         112, 110, 109, 107, 105, 104, 102, 100,
3627         99, 97, 96, 94, 93, 91, 90, 88,
3628         87, 85, 84, 83, 81, 80, 79, 77,
3629         76, 75, 74, 72, 71, 70, 69, 68,
3630         66, 65, 64, 63, 62, 61, 60, 59,
3631         58, 57, 56, 55, 54, 53, 52, 51,
3632         50, 49, 48, 47, 46, 45, 44, 43,
3633         42, 41, 40, 40, 39, 38, 37, 36,
3634         35, 35, 34, 33, 32, 31, 31, 30,
3635         29, 28, 28, 27, 26, 25, 25, 24,
3636         23, 23, 22, 21, 21, 20, 19, 19,
3637         18, 17, 17, 16, 15, 15, 14, 14,
3638         13, 12, 12, 11, 11, 10, 9, 9,
3639         8, 8, 7, 7, 6, 5, 5, 4,
3640         4, 3, 3, 2, 2, 1, 1, 0
3641     };
3642     const int precision = 7;
3643 
3644     if (exp == 0 && frac != 0) { /* subnormal */
3645         /* Normalize the subnormal. */
3646         while (extract64(frac, frac_size - 1, 1) == 0) {
3647             exp--;
3648             frac <<= 1;
3649         }
3650 
3651         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3652 
3653         if (exp != 0 && exp != UINT64_MAX) {
3654             /*
3655              * Overflow to inf or max value of same sign,
3656              * depending on sign and rounding mode.
3657              */
3658             s->float_exception_flags |= (float_flag_inexact |
3659                                          float_flag_overflow);
3660 
3661             if ((s->float_rounding_mode == float_round_to_zero) ||
3662                 ((s->float_rounding_mode == float_round_down) && !sign) ||
3663                 ((s->float_rounding_mode == float_round_up) && sign)) {
3664                 /* Return greatest/negative finite value. */
3665                 return (sign << (exp_size + frac_size)) |
3666                     (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3667             } else {
3668                 /* Return +-inf. */
3669                 return (sign << (exp_size + frac_size)) |
3670                     MAKE_64BIT_MASK(frac_size, exp_size);
3671             }
3672         }
3673     }
3674 
3675     int idx = frac >> (frac_size - precision);
3676     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3677                             (frac_size - precision);
3678     uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3679 
3680     if (out_exp == 0 || out_exp == UINT64_MAX) {
3681         /*
3682          * The result is subnormal, but don't raise the underflow exception,
3683          * because there's no additional loss of precision.
3684          */
3685         out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3686         if (out_exp == UINT64_MAX) {
3687             out_frac >>= 1;
3688             out_exp = 0;
3689         }
3690     }
3691 
3692     uint64_t val = 0;
3693     val = deposit64(val, 0, frac_size, out_frac);
3694     val = deposit64(val, frac_size, exp_size, out_exp);
3695     val = deposit64(val, frac_size + exp_size, 1, sign);
3696     return val;
3697 }
3698 
3699 static float16 frec7_h(float16 f, float_status *s)
3700 {
3701     int exp_size = 5, frac_size = 10;
3702     bool sign = float16_is_neg(f);
3703 
3704     /* frec7(+-inf) = +-0 */
3705     if (float16_is_infinity(f)) {
3706         return float16_set_sign(float16_zero, sign);
3707     }
3708 
3709     /* frec7(+-0) = +-inf */
3710     if (float16_is_zero(f)) {
3711         s->float_exception_flags |= float_flag_divbyzero;
3712         return float16_set_sign(float16_infinity, sign);
3713     }
3714 
3715     /* frec7(sNaN) = canonical NaN */
3716     if (float16_is_signaling_nan(f, s)) {
3717         s->float_exception_flags |= float_flag_invalid;
3718         return float16_default_nan(s);
3719     }
3720 
3721     /* frec7(qNaN) = canonical NaN */
3722     if (float16_is_quiet_nan(f, s)) {
3723         return float16_default_nan(s);
3724     }
3725 
3726     /* +-normal, +-subnormal */
3727     uint64_t val = frec7(f, exp_size, frac_size, s);
3728     return make_float16(val);
3729 }
3730 
3731 static float32 frec7_s(float32 f, float_status *s)
3732 {
3733     int exp_size = 8, frac_size = 23;
3734     bool sign = float32_is_neg(f);
3735 
3736     /* frec7(+-inf) = +-0 */
3737     if (float32_is_infinity(f)) {
3738         return float32_set_sign(float32_zero, sign);
3739     }
3740 
3741     /* frec7(+-0) = +-inf */
3742     if (float32_is_zero(f)) {
3743         s->float_exception_flags |= float_flag_divbyzero;
3744         return float32_set_sign(float32_infinity, sign);
3745     }
3746 
3747     /* frec7(sNaN) = canonical NaN */
3748     if (float32_is_signaling_nan(f, s)) {
3749         s->float_exception_flags |= float_flag_invalid;
3750         return float32_default_nan(s);
3751     }
3752 
3753     /* frec7(qNaN) = canonical NaN */
3754     if (float32_is_quiet_nan(f, s)) {
3755         return float32_default_nan(s);
3756     }
3757 
3758     /* +-normal, +-subnormal */
3759     uint64_t val = frec7(f, exp_size, frac_size, s);
3760     return make_float32(val);
3761 }
3762 
3763 static float64 frec7_d(float64 f, float_status *s)
3764 {
3765     int exp_size = 11, frac_size = 52;
3766     bool sign = float64_is_neg(f);
3767 
3768     /* frec7(+-inf) = +-0 */
3769     if (float64_is_infinity(f)) {
3770         return float64_set_sign(float64_zero, sign);
3771     }
3772 
3773     /* frec7(+-0) = +-inf */
3774     if (float64_is_zero(f)) {
3775         s->float_exception_flags |= float_flag_divbyzero;
3776         return float64_set_sign(float64_infinity, sign);
3777     }
3778 
3779     /* frec7(sNaN) = canonical NaN */
3780     if (float64_is_signaling_nan(f, s)) {
3781         s->float_exception_flags |= float_flag_invalid;
3782         return float64_default_nan(s);
3783     }
3784 
3785     /* frec7(qNaN) = canonical NaN */
3786     if (float64_is_quiet_nan(f, s)) {
3787         return float64_default_nan(s);
3788     }
3789 
3790     /* +-normal, +-subnormal */
3791     uint64_t val = frec7(f, exp_size, frac_size, s);
3792     return make_float64(val);
3793 }
3794 
3795 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3796 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3797 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3798 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
3799 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
3800 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
3801 
3802 /* Vector Floating-Point MIN/MAX Instructions */
3803 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3804 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3805 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3806 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3807 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3808 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3809 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3810 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3811 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3812 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3813 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3814 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3815 
3816 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3817 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3818 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3819 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3820 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3821 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3822 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3823 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3824 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3825 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3826 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3827 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3828 
3829 /* Vector Floating-Point Sign-Injection Instructions */
3830 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3831 {
3832     return deposit64(b, 0, 15, a);
3833 }
3834 
3835 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3836 {
3837     return deposit64(b, 0, 31, a);
3838 }
3839 
3840 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3841 {
3842     return deposit64(b, 0, 63, a);
3843 }
3844 
3845 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3846 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3847 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3848 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3849 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3850 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3851 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3852 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3853 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3854 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3855 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3856 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3857 
3858 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3859 {
3860     return deposit64(~b, 0, 15, a);
3861 }
3862 
3863 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3864 {
3865     return deposit64(~b, 0, 31, a);
3866 }
3867 
3868 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3869 {
3870     return deposit64(~b, 0, 63, a);
3871 }
3872 
3873 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3874 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3875 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3876 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3877 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3878 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3879 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3880 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3881 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3882 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3883 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3884 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3885 
3886 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3887 {
3888     return deposit64(b ^ a, 0, 15, a);
3889 }
3890 
3891 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3892 {
3893     return deposit64(b ^ a, 0, 31, a);
3894 }
3895 
3896 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3897 {
3898     return deposit64(b ^ a, 0, 63, a);
3899 }
3900 
3901 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3902 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3903 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3904 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3905 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3906 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3907 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3908 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3909 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3910 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3911 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3912 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3913 
3914 /* Vector Floating-Point Compare Instructions */
3915 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3916 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3917                   CPURISCVState *env, uint32_t desc)          \
3918 {                                                             \
3919     uint32_t vm = vext_vm(desc);                              \
3920     uint32_t vl = env->vl;                                    \
3921     uint32_t i;                                               \
3922                                                               \
3923     for (i = env->vstart; i < vl; i++) {                      \
3924         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3925         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3926         if (!vm && !vext_elem_mask(v0, i)) {                  \
3927             continue;                                         \
3928         }                                                     \
3929         vext_set_elem_mask(vd, i,                             \
3930                            DO_OP(s2, s1, &env->fp_status));   \
3931     }                                                         \
3932     env->vstart = 0;                                          \
3933 }
3934 
3935 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3936 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3937 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3938 
3939 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3940 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3941                   CPURISCVState *env, uint32_t desc)                \
3942 {                                                                   \
3943     uint32_t vm = vext_vm(desc);                                    \
3944     uint32_t vl = env->vl;                                          \
3945     uint32_t i;                                                     \
3946                                                                     \
3947     for (i = env->vstart; i < vl; i++) {                            \
3948         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3949         if (!vm && !vext_elem_mask(v0, i)) {                        \
3950             continue;                                               \
3951         }                                                           \
3952         vext_set_elem_mask(vd, i,                                   \
3953                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3954     }                                                               \
3955     env->vstart = 0;                                                \
3956 }
3957 
3958 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3959 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3960 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3961 
3962 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3963 {
3964     FloatRelation compare = float16_compare_quiet(a, b, s);
3965     return compare != float_relation_equal;
3966 }
3967 
3968 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3969 {
3970     FloatRelation compare = float32_compare_quiet(a, b, s);
3971     return compare != float_relation_equal;
3972 }
3973 
3974 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3975 {
3976     FloatRelation compare = float64_compare_quiet(a, b, s);
3977     return compare != float_relation_equal;
3978 }
3979 
3980 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3981 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3982 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3983 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3984 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3985 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3986 
3987 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3988 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3989 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3990 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3991 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3992 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3993 
3994 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3995 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
3996 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
3997 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
3998 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
3999 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4000 
4001 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4002 {
4003     FloatRelation compare = float16_compare(a, b, s);
4004     return compare == float_relation_greater;
4005 }
4006 
4007 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4008 {
4009     FloatRelation compare = float32_compare(a, b, s);
4010     return compare == float_relation_greater;
4011 }
4012 
4013 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4014 {
4015     FloatRelation compare = float64_compare(a, b, s);
4016     return compare == float_relation_greater;
4017 }
4018 
4019 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4020 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4021 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4022 
4023 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4024 {
4025     FloatRelation compare = float16_compare(a, b, s);
4026     return compare == float_relation_greater ||
4027            compare == float_relation_equal;
4028 }
4029 
4030 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4031 {
4032     FloatRelation compare = float32_compare(a, b, s);
4033     return compare == float_relation_greater ||
4034            compare == float_relation_equal;
4035 }
4036 
4037 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4038 {
4039     FloatRelation compare = float64_compare(a, b, s);
4040     return compare == float_relation_greater ||
4041            compare == float_relation_equal;
4042 }
4043 
4044 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4045 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4046 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4047 
4048 /* Vector Floating-Point Classify Instruction */
4049 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4050 static void do_##NAME(void *vd, void *vs2, int i)      \
4051 {                                                      \
4052     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4053     *((TD *)vd + HD(i)) = OP(s2);                      \
4054 }
4055 
4056 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
4057 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4058                   CPURISCVState *env, uint32_t desc)   \
4059 {                                                      \
4060     uint32_t vm = vext_vm(desc);                       \
4061     uint32_t vl = env->vl;                             \
4062     uint32_t i;                                        \
4063                                                        \
4064     for (i = env->vstart; i < vl; i++) {               \
4065         if (!vm && !vext_elem_mask(v0, i)) {           \
4066             continue;                                  \
4067         }                                              \
4068         do_##NAME(vd, vs2, i);                         \
4069     }                                                  \
4070     env->vstart = 0;                                   \
4071 }
4072 
4073 target_ulong fclass_h(uint64_t frs1)
4074 {
4075     float16 f = frs1;
4076     bool sign = float16_is_neg(f);
4077 
4078     if (float16_is_infinity(f)) {
4079         return sign ? 1 << 0 : 1 << 7;
4080     } else if (float16_is_zero(f)) {
4081         return sign ? 1 << 3 : 1 << 4;
4082     } else if (float16_is_zero_or_denormal(f)) {
4083         return sign ? 1 << 2 : 1 << 5;
4084     } else if (float16_is_any_nan(f)) {
4085         float_status s = { }; /* for snan_bit_is_one */
4086         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4087     } else {
4088         return sign ? 1 << 1 : 1 << 6;
4089     }
4090 }
4091 
4092 target_ulong fclass_s(uint64_t frs1)
4093 {
4094     float32 f = frs1;
4095     bool sign = float32_is_neg(f);
4096 
4097     if (float32_is_infinity(f)) {
4098         return sign ? 1 << 0 : 1 << 7;
4099     } else if (float32_is_zero(f)) {
4100         return sign ? 1 << 3 : 1 << 4;
4101     } else if (float32_is_zero_or_denormal(f)) {
4102         return sign ? 1 << 2 : 1 << 5;
4103     } else if (float32_is_any_nan(f)) {
4104         float_status s = { }; /* for snan_bit_is_one */
4105         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4106     } else {
4107         return sign ? 1 << 1 : 1 << 6;
4108     }
4109 }
4110 
4111 target_ulong fclass_d(uint64_t frs1)
4112 {
4113     float64 f = frs1;
4114     bool sign = float64_is_neg(f);
4115 
4116     if (float64_is_infinity(f)) {
4117         return sign ? 1 << 0 : 1 << 7;
4118     } else if (float64_is_zero(f)) {
4119         return sign ? 1 << 3 : 1 << 4;
4120     } else if (float64_is_zero_or_denormal(f)) {
4121         return sign ? 1 << 2 : 1 << 5;
4122     } else if (float64_is_any_nan(f)) {
4123         float_status s = { }; /* for snan_bit_is_one */
4124         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4125     } else {
4126         return sign ? 1 << 1 : 1 << 6;
4127     }
4128 }
4129 
4130 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4131 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4132 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4133 GEN_VEXT_V(vfclass_v_h, 2, 2)
4134 GEN_VEXT_V(vfclass_v_w, 4, 4)
4135 GEN_VEXT_V(vfclass_v_d, 8, 8)
4136 
4137 /* Vector Floating-Point Merge Instruction */
4138 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
4139 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4140                   CPURISCVState *env, uint32_t desc)          \
4141 {                                                             \
4142     uint32_t vm = vext_vm(desc);                              \
4143     uint32_t vl = env->vl;                                    \
4144     uint32_t i;                                               \
4145                                                               \
4146     for (i = env->vstart; i < vl; i++) {                      \
4147         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4148         *((ETYPE *)vd + H(i))                                 \
4149           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
4150     }                                                         \
4151     env->vstart = 0;                                          \
4152 }
4153 
4154 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4155 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4156 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4157 
4158 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4159 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4160 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4161 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4162 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4163 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
4164 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
4165 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
4166 
4167 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4168 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4169 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4170 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4171 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
4172 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
4173 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
4174 
4175 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4176 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4177 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4178 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4179 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
4180 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
4181 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
4182 
4183 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4184 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4185 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4186 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4187 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
4188 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
4189 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4190 
4191 /* Widening Floating-Point/Integer Type-Convert Instructions */
4192 /* (TD, T2, TX2) */
4193 #define WOP_UU_B uint16_t, uint8_t,  uint8_t
4194 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4195 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4196 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4197 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4198 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4199 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
4200 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4201 
4202 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4203 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4204 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4205 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
4206 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4207 
4208 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4209 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4210 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4211 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4212 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
4213 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
4214 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4215 
4216 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4217 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4218 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4219 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4220 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
4221 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
4222 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4223 
4224 /*
4225  * vfwcvt.f.f.v vd, vs2, vm
4226  * Convert single-width float to double-width float.
4227  */
4228 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4229 {
4230     return float16_to_float32(a, true, s);
4231 }
4232 
4233 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4234 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4235 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
4236 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
4237 
4238 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4239 /* (TD, T2, TX2) */
4240 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
4241 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4242 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4243 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4244 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4245 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4246 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4247 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
4248 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
4249 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
4250 
4251 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4252 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4253 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4254 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4255 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
4256 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
4257 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
4258 
4259 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4260 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4261 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4262 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
4263 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
4264 
4265 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4266 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4267 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4268 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
4269 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
4270 
4271 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4272 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4273 {
4274     return float32_to_float16(a, true, s);
4275 }
4276 
4277 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4278 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4279 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
4280 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
4281 
4282 /*
4283  *** Vector Reduction Operations
4284  */
4285 /* Vector Single-Width Integer Reduction Instructions */
4286 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4287 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4288         void *vs2, CPURISCVState *env, uint32_t desc)     \
4289 {                                                         \
4290     uint32_t vm = vext_vm(desc);                          \
4291     uint32_t vl = env->vl;                                \
4292     uint32_t i;                                           \
4293     TD s1 =  *((TD *)vs1 + HD(0));                        \
4294                                                           \
4295     for (i = env->vstart; i < vl; i++) {                  \
4296         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4297         if (!vm && !vext_elem_mask(v0, i)) {              \
4298             continue;                                     \
4299         }                                                 \
4300         s1 = OP(s1, (TD)s2);                              \
4301     }                                                     \
4302     *((TD *)vd + HD(0)) = s1;                             \
4303     env->vstart = 0;                                      \
4304 }
4305 
4306 /* vd[0] = sum(vs1[0], vs2[*]) */
4307 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4308 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4309 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4310 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4311 
4312 /* vd[0] = maxu(vs1[0], vs2[*]) */
4313 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4314 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4315 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4316 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4317 
4318 /* vd[0] = max(vs1[0], vs2[*]) */
4319 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4320 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4321 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4322 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4323 
4324 /* vd[0] = minu(vs1[0], vs2[*]) */
4325 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4326 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4327 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4328 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4329 
4330 /* vd[0] = min(vs1[0], vs2[*]) */
4331 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4332 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4333 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4334 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4335 
4336 /* vd[0] = and(vs1[0], vs2[*]) */
4337 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4338 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4339 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4340 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4341 
4342 /* vd[0] = or(vs1[0], vs2[*]) */
4343 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4344 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4345 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4346 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4347 
4348 /* vd[0] = xor(vs1[0], vs2[*]) */
4349 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4350 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4351 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4352 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4353 
4354 /* Vector Widening Integer Reduction Instructions */
4355 /* signed sum reduction into double-width accumulator */
4356 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4357 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4358 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4359 
4360 /* Unsigned sum reduction into double-width accumulator */
4361 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4362 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4363 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4364 
4365 /* Vector Single-Width Floating-Point Reduction Instructions */
4366 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4367 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4368                   void *vs2, CPURISCVState *env,           \
4369                   uint32_t desc)                           \
4370 {                                                          \
4371     uint32_t vm = vext_vm(desc);                           \
4372     uint32_t vl = env->vl;                                 \
4373     uint32_t i;                                            \
4374     TD s1 =  *((TD *)vs1 + HD(0));                         \
4375                                                            \
4376     for (i = env->vstart; i < vl; i++) {                   \
4377         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4378         if (!vm && !vext_elem_mask(v0, i)) {               \
4379             continue;                                      \
4380         }                                                  \
4381         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4382     }                                                      \
4383     *((TD *)vd + HD(0)) = s1;                              \
4384     env->vstart = 0;                                       \
4385 }
4386 
4387 /* Unordered sum */
4388 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4389 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4390 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4391 
4392 /* Maximum value */
4393 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4394 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4395 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4396 
4397 /* Minimum value */
4398 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4399 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4400 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4401 
4402 /* Vector Widening Floating-Point Reduction Instructions */
4403 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4404 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4405                             void *vs2, CPURISCVState *env, uint32_t desc)
4406 {
4407     uint32_t vm = vext_vm(desc);
4408     uint32_t vl = env->vl;
4409     uint32_t i;
4410     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4411 
4412     for (i = env->vstart; i < vl; i++) {
4413         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4414         if (!vm && !vext_elem_mask(v0, i)) {
4415             continue;
4416         }
4417         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4418                          &env->fp_status);
4419     }
4420     *((uint32_t *)vd + H4(0)) = s1;
4421     env->vstart = 0;
4422 }
4423 
4424 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4425                             void *vs2, CPURISCVState *env, uint32_t desc)
4426 {
4427     uint32_t vm = vext_vm(desc);
4428     uint32_t vl = env->vl;
4429     uint32_t i;
4430     uint64_t s1 =  *((uint64_t *)vs1);
4431 
4432     for (i = env->vstart; i < vl; i++) {
4433         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4434         if (!vm && !vext_elem_mask(v0, i)) {
4435             continue;
4436         }
4437         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4438                          &env->fp_status);
4439     }
4440     *((uint64_t *)vd) = s1;
4441     env->vstart = 0;
4442 }
4443 
4444 /*
4445  *** Vector Mask Operations
4446  */
4447 /* Vector Mask-Register Logical Instructions */
4448 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4449 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4450                   void *vs2, CPURISCVState *env,          \
4451                   uint32_t desc)                          \
4452 {                                                         \
4453     uint32_t vl = env->vl;                                \
4454     uint32_t i;                                           \
4455     int a, b;                                             \
4456                                                           \
4457     for (i = env->vstart; i < vl; i++) {                  \
4458         a = vext_elem_mask(vs1, i);                       \
4459         b = vext_elem_mask(vs2, i);                       \
4460         vext_set_elem_mask(vd, i, OP(b, a));              \
4461     }                                                     \
4462     env->vstart = 0;                                      \
4463 }
4464 
4465 #define DO_NAND(N, M)  (!(N & M))
4466 #define DO_ANDNOT(N, M)  (N & !M)
4467 #define DO_NOR(N, M)  (!(N | M))
4468 #define DO_ORNOT(N, M)  (N | !M)
4469 #define DO_XNOR(N, M)  (!(N ^ M))
4470 
4471 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4472 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4473 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4474 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4475 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4476 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4477 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4478 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4479 
4480 /* Vector count population in mask vcpop */
4481 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4482                              uint32_t desc)
4483 {
4484     target_ulong cnt = 0;
4485     uint32_t vm = vext_vm(desc);
4486     uint32_t vl = env->vl;
4487     int i;
4488 
4489     for (i = env->vstart; i < vl; i++) {
4490         if (vm || vext_elem_mask(v0, i)) {
4491             if (vext_elem_mask(vs2, i)) {
4492                 cnt++;
4493             }
4494         }
4495     }
4496     env->vstart = 0;
4497     return cnt;
4498 }
4499 
4500 /* vfirst find-first-set mask bit*/
4501 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4502                               uint32_t desc)
4503 {
4504     uint32_t vm = vext_vm(desc);
4505     uint32_t vl = env->vl;
4506     int i;
4507 
4508     for (i = env->vstart; i < vl; i++) {
4509         if (vm || vext_elem_mask(v0, i)) {
4510             if (vext_elem_mask(vs2, i)) {
4511                 return i;
4512             }
4513         }
4514     }
4515     env->vstart = 0;
4516     return -1LL;
4517 }
4518 
4519 enum set_mask_type {
4520     ONLY_FIRST = 1,
4521     INCLUDE_FIRST,
4522     BEFORE_FIRST,
4523 };
4524 
4525 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4526                    uint32_t desc, enum set_mask_type type)
4527 {
4528     uint32_t vm = vext_vm(desc);
4529     uint32_t vl = env->vl;
4530     int i;
4531     bool first_mask_bit = false;
4532 
4533     for (i = env->vstart; i < vl; i++) {
4534         if (!vm && !vext_elem_mask(v0, i)) {
4535             continue;
4536         }
4537         /* write a zero to all following active elements */
4538         if (first_mask_bit) {
4539             vext_set_elem_mask(vd, i, 0);
4540             continue;
4541         }
4542         if (vext_elem_mask(vs2, i)) {
4543             first_mask_bit = true;
4544             if (type == BEFORE_FIRST) {
4545                 vext_set_elem_mask(vd, i, 0);
4546             } else {
4547                 vext_set_elem_mask(vd, i, 1);
4548             }
4549         } else {
4550             if (type == ONLY_FIRST) {
4551                 vext_set_elem_mask(vd, i, 0);
4552             } else {
4553                 vext_set_elem_mask(vd, i, 1);
4554             }
4555         }
4556     }
4557     env->vstart = 0;
4558 }
4559 
4560 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4561                      uint32_t desc)
4562 {
4563     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4564 }
4565 
4566 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4567                      uint32_t desc)
4568 {
4569     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4570 }
4571 
4572 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4573                      uint32_t desc)
4574 {
4575     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4576 }
4577 
4578 /* Vector Iota Instruction */
4579 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4580 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4581                   uint32_t desc)                                          \
4582 {                                                                         \
4583     uint32_t vm = vext_vm(desc);                                          \
4584     uint32_t vl = env->vl;                                                \
4585     uint32_t sum = 0;                                                     \
4586     int i;                                                                \
4587                                                                           \
4588     for (i = env->vstart; i < vl; i++) {                                  \
4589         if (!vm && !vext_elem_mask(v0, i)) {                              \
4590             continue;                                                     \
4591         }                                                                 \
4592         *((ETYPE *)vd + H(i)) = sum;                                      \
4593         if (vext_elem_mask(vs2, i)) {                                     \
4594             sum++;                                                        \
4595         }                                                                 \
4596     }                                                                     \
4597     env->vstart = 0;                                                      \
4598 }
4599 
4600 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4601 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4602 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4603 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4604 
4605 /* Vector Element Index Instruction */
4606 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4607 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4608 {                                                                         \
4609     uint32_t vm = vext_vm(desc);                                          \
4610     uint32_t vl = env->vl;                                                \
4611     int i;                                                                \
4612                                                                           \
4613     for (i = env->vstart; i < vl; i++) {                                  \
4614         if (!vm && !vext_elem_mask(v0, i)) {                              \
4615             continue;                                                     \
4616         }                                                                 \
4617         *((ETYPE *)vd + H(i)) = i;                                        \
4618     }                                                                     \
4619     env->vstart = 0;                                                      \
4620 }
4621 
4622 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4623 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4624 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4625 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4626 
4627 /*
4628  *** Vector Permutation Instructions
4629  */
4630 
4631 /* Vector Slide Instructions */
4632 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4633 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4634                   CPURISCVState *env, uint32_t desc)                      \
4635 {                                                                         \
4636     uint32_t vm = vext_vm(desc);                                          \
4637     uint32_t vl = env->vl;                                                \
4638     target_ulong offset = s1, i_min, i;                                   \
4639                                                                           \
4640     i_min = MAX(env->vstart, offset);                                     \
4641     for (i = i_min; i < vl; i++) {                                        \
4642         if (!vm && !vext_elem_mask(v0, i)) {                              \
4643             continue;                                                     \
4644         }                                                                 \
4645         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4646     }                                                                     \
4647 }
4648 
4649 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4650 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4651 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4652 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4653 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4654 
4655 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4656 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4657                   CPURISCVState *env, uint32_t desc)                      \
4658 {                                                                         \
4659     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4660     uint32_t vm = vext_vm(desc);                                          \
4661     uint32_t vl = env->vl;                                                \
4662     target_ulong i_max, i;                                                \
4663                                                                           \
4664     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4665     for (i = env->vstart; i < i_max; ++i) {                               \
4666         if (vm || vext_elem_mask(v0, i)) {                                \
4667             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4668         }                                                                 \
4669     }                                                                     \
4670                                                                           \
4671     for (i = i_max; i < vl; ++i) {                                        \
4672         if (vm || vext_elem_mask(v0, i)) {                                \
4673             *((ETYPE *)vd + H(i)) = 0;                                    \
4674         }                                                                 \
4675     }                                                                     \
4676                                                                           \
4677     env->vstart = 0;                                                      \
4678 }
4679 
4680 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4681 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4682 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4683 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4684 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4685 
4686 #define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4687 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4688                      CPURISCVState *env, uint32_t desc)                     \
4689 {                                                                           \
4690     typedef uint##ESZ##_t ETYPE;                                            \
4691     uint32_t vm = vext_vm(desc);                                            \
4692     uint32_t vl = env->vl;                                                  \
4693     uint32_t i;                                                             \
4694                                                                             \
4695     for (i = env->vstart; i < vl; i++) {                                    \
4696         if (!vm && !vext_elem_mask(v0, i)) {                                \
4697             continue;                                                       \
4698         }                                                                   \
4699         if (i == 0) {                                                       \
4700             *((ETYPE *)vd + H(i)) = s1;                                     \
4701         } else {                                                            \
4702             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4703         }                                                                   \
4704     }                                                                       \
4705     env->vstart = 0;                                                        \
4706 }
4707 
4708 GEN_VEXT_VSLIE1UP(8,  H1)
4709 GEN_VEXT_VSLIE1UP(16, H2)
4710 GEN_VEXT_VSLIE1UP(32, H4)
4711 GEN_VEXT_VSLIE1UP(64, H8)
4712 
4713 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4714 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4715                   CPURISCVState *env, uint32_t desc)              \
4716 {                                                                 \
4717     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4718 }
4719 
4720 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4721 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4722 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4723 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4724 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4725 
4726 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4727 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4728                        CPURISCVState *env, uint32_t desc)                     \
4729 {                                                                             \
4730     typedef uint##ESZ##_t ETYPE;                                              \
4731     uint32_t vm = vext_vm(desc);                                              \
4732     uint32_t vl = env->vl;                                                    \
4733     uint32_t i;                                                               \
4734                                                                               \
4735     for (i = env->vstart; i < vl; i++) {                                      \
4736         if (!vm && !vext_elem_mask(v0, i)) {                                  \
4737             continue;                                                         \
4738         }                                                                     \
4739         if (i == vl - 1) {                                                    \
4740             *((ETYPE *)vd + H(i)) = s1;                                       \
4741         } else {                                                              \
4742             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4743         }                                                                     \
4744     }                                                                         \
4745     env->vstart = 0;                                                          \
4746 }
4747 
4748 GEN_VEXT_VSLIDE1DOWN(8,  H1)
4749 GEN_VEXT_VSLIDE1DOWN(16, H2)
4750 GEN_VEXT_VSLIDE1DOWN(32, H4)
4751 GEN_VEXT_VSLIDE1DOWN(64, H8)
4752 
4753 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4754 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4755                   CPURISCVState *env, uint32_t desc)              \
4756 {                                                                 \
4757     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4758 }
4759 
4760 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4761 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4762 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4763 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4764 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4765 
4766 /* Vector Floating-Point Slide Instructions */
4767 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4768 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4769                   CPURISCVState *env, uint32_t desc)          \
4770 {                                                             \
4771     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4772 }
4773 
4774 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4775 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4776 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4777 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4778 
4779 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4780 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4781                   CPURISCVState *env, uint32_t desc)          \
4782 {                                                             \
4783     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4784 }
4785 
4786 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4787 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4788 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4789 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4790 
4791 /* Vector Register Gather Instruction */
4792 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4793 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4794                   CPURISCVState *env, uint32_t desc)                      \
4795 {                                                                         \
4796     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4797     uint32_t vm = vext_vm(desc);                                          \
4798     uint32_t vl = env->vl;                                                \
4799     uint64_t index;                                                       \
4800     uint32_t i;                                                           \
4801                                                                           \
4802     for (i = env->vstart; i < vl; i++) {                                  \
4803         if (!vm && !vext_elem_mask(v0, i)) {                              \
4804             continue;                                                     \
4805         }                                                                 \
4806         index = *((TS1 *)vs1 + HS1(i));                                   \
4807         if (index >= vlmax) {                                             \
4808             *((TS2 *)vd + HS2(i)) = 0;                                    \
4809         } else {                                                          \
4810             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4811         }                                                                 \
4812     }                                                                     \
4813     env->vstart = 0;                                                      \
4814 }
4815 
4816 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4817 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4818 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4819 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4820 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4821 
4822 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4823 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4824 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4825 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4826 
4827 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4828 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4829                   CPURISCVState *env, uint32_t desc)                      \
4830 {                                                                         \
4831     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4832     uint32_t vm = vext_vm(desc);                                          \
4833     uint32_t vl = env->vl;                                                \
4834     uint64_t index = s1;                                                  \
4835     uint32_t i;                                                           \
4836                                                                           \
4837     for (i = env->vstart; i < vl; i++) {                                  \
4838         if (!vm && !vext_elem_mask(v0, i)) {                              \
4839             continue;                                                     \
4840         }                                                                 \
4841         if (index >= vlmax) {                                             \
4842             *((ETYPE *)vd + H(i)) = 0;                                    \
4843         } else {                                                          \
4844             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4845         }                                                                 \
4846     }                                                                     \
4847     env->vstart = 0;                                                      \
4848 }
4849 
4850 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4851 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4852 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4853 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4854 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4855 
4856 /* Vector Compress Instruction */
4857 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4858 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4859                   CPURISCVState *env, uint32_t desc)                      \
4860 {                                                                         \
4861     uint32_t vl = env->vl;                                                \
4862     uint32_t num = 0, i;                                                  \
4863                                                                           \
4864     for (i = env->vstart; i < vl; i++) {                                  \
4865         if (!vext_elem_mask(vs1, i)) {                                    \
4866             continue;                                                     \
4867         }                                                                 \
4868         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4869         num++;                                                            \
4870     }                                                                     \
4871     env->vstart = 0;                                                      \
4872 }
4873 
4874 /* Compress into vd elements of vs2 where vs1 is enabled */
4875 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4876 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4877 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4878 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4879 
4880 /* Vector Whole Register Move */
4881 #define GEN_VEXT_VMV_WHOLE(NAME, LEN)                      \
4882 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
4883                   uint32_t desc)                           \
4884 {                                                          \
4885     /* EEW = 8 */                                          \
4886     uint32_t maxsz = simd_maxsz(desc);                     \
4887     uint32_t i = env->vstart;                              \
4888                                                            \
4889     memcpy((uint8_t *)vd + H1(i),                          \
4890            (uint8_t *)vs2 + H1(i),                         \
4891            maxsz - env->vstart);                           \
4892                                                            \
4893     env->vstart = 0;                                       \
4894 }
4895 
4896 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
4897 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
4898 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
4899 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
4900 
4901 /* Vector Integer Extension */
4902 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4903 void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4904                   CPURISCVState *env, uint32_t desc)             \
4905 {                                                                \
4906     uint32_t vl = env->vl;                                       \
4907     uint32_t vm = vext_vm(desc);                                 \
4908     uint32_t i;                                                  \
4909                                                                  \
4910     for (i = env->vstart; i < vl; i++) {                         \
4911         if (!vm && !vext_elem_mask(v0, i)) {                     \
4912             continue;                                            \
4913         }                                                        \
4914         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4915     }                                                            \
4916     env->vstart = 0;                                             \
4917 }
4918 
4919 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4920 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4921 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4922 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4923 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4924 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4925 
4926 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4927 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4928 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4929 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4930 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4931 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4932