xref: /openbmc/qemu/target/riscv/vector_helper.c (revision d96a271a)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30 
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32                             target_ulong s2)
33 {
34     int vlmax, vl;
35     RISCVCPU *cpu = env_archcpu(env);
36     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39     bool vill = FIELD_EX64(s2, VTYPE, VILL);
40     target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
41 
42     if (lmul & 4) {
43         /* Fractional LMUL. */
44         if (lmul == 4 ||
45             cpu->cfg.elen >> (8 - lmul) < sew) {
46             vill = true;
47         }
48     }
49 
50     if ((sew > cpu->cfg.elen)
51         || vill
52         || (ediv != 0)
53         || (reserved != 0)) {
54         /* only set vill bit. */
55         env->vill = 1;
56         env->vtype = 0;
57         env->vl = 0;
58         env->vstart = 0;
59         return 0;
60     }
61 
62     vlmax = vext_get_vlmax(cpu, s2);
63     if (s1 <= vlmax) {
64         vl = s1;
65     } else {
66         vl = vlmax;
67     }
68     env->vl = vl;
69     env->vtype = s2;
70     env->vstart = 0;
71     return vl;
72 }
73 
74 /*
75  * Note that vector data is stored in host-endian 64-bit chunks,
76  * so addressing units smaller than that needs a host-endian fixup.
77  */
78 #ifdef HOST_WORDS_BIGENDIAN
79 #define H1(x)   ((x) ^ 7)
80 #define H1_2(x) ((x) ^ 6)
81 #define H1_4(x) ((x) ^ 4)
82 #define H2(x)   ((x) ^ 3)
83 #define H4(x)   ((x) ^ 1)
84 #define H8(x)   ((x))
85 #else
86 #define H1(x)   (x)
87 #define H1_2(x) (x)
88 #define H1_4(x) (x)
89 #define H2(x)   (x)
90 #define H4(x)   (x)
91 #define H8(x)   (x)
92 #endif
93 
94 static inline uint32_t vext_nf(uint32_t desc)
95 {
96     return FIELD_EX32(simd_data(desc), VDATA, NF);
97 }
98 
99 static inline uint32_t vext_vm(uint32_t desc)
100 {
101     return FIELD_EX32(simd_data(desc), VDATA, VM);
102 }
103 
104 /*
105  * Encode LMUL to lmul as following:
106  *     LMUL    vlmul    lmul
107  *      1       000       0
108  *      2       001       1
109  *      4       010       2
110  *      8       011       3
111  *      -       100       -
112  *     1/8      101      -3
113  *     1/4      110      -2
114  *     1/2      111      -1
115  */
116 static inline int32_t vext_lmul(uint32_t desc)
117 {
118     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
119 }
120 
121 /*
122  * Get the maximum number of elements can be operated.
123  *
124  * esz: log2 of element size in bytes.
125  */
126 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
127 {
128     /*
129      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
130      * so vlen in bytes (vlenb) is encoded as maxsz.
131      */
132     uint32_t vlenb = simd_maxsz(desc);
133 
134     /* Return VLMAX */
135     int scale = vext_lmul(desc) - esz;
136     return scale < 0 ? vlenb >> -scale : vlenb << scale;
137 }
138 
139 /*
140  * This function checks watchpoint before real load operation.
141  *
142  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
143  * In user mode, there is no watchpoint support now.
144  *
145  * It will trigger an exception if there is no mapping in TLB
146  * and page table walk can't fill the TLB entry. Then the guest
147  * software can return here after process the exception or never return.
148  */
149 static void probe_pages(CPURISCVState *env, target_ulong addr,
150                         target_ulong len, uintptr_t ra,
151                         MMUAccessType access_type)
152 {
153     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
154     target_ulong curlen = MIN(pagelen, len);
155 
156     probe_access(env, addr, curlen, access_type,
157                  cpu_mmu_index(env, false), ra);
158     if (len > curlen) {
159         addr += curlen;
160         curlen = len - curlen;
161         probe_access(env, addr, curlen, access_type,
162                      cpu_mmu_index(env, false), ra);
163     }
164 }
165 
166 static inline void vext_set_elem_mask(void *v0, int index,
167                                       uint8_t value)
168 {
169     int idx = index / 64;
170     int pos = index % 64;
171     uint64_t old = ((uint64_t *)v0)[idx];
172     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
173 }
174 
175 /*
176  * Earlier designs (pre-0.9) had a varying number of bits
177  * per mask value (MLEN). In the 0.9 design, MLEN=1.
178  * (Section 4.5)
179  */
180 static inline int vext_elem_mask(void *v0, int index)
181 {
182     int idx = index / 64;
183     int pos = index  % 64;
184     return (((uint64_t *)v0)[idx] >> pos) & 1;
185 }
186 
187 /* elements operations for load and store */
188 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
189                                uint32_t idx, void *vd, uintptr_t retaddr);
190 
191 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
192 static void NAME(CPURISCVState *env, abi_ptr addr,         \
193                  uint32_t idx, void *vd, uintptr_t retaddr)\
194 {                                                          \
195     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
196     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
197 }                                                          \
198 
199 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
200 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
201 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
202 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
203 
204 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
205 static void NAME(CPURISCVState *env, abi_ptr addr,         \
206                  uint32_t idx, void *vd, uintptr_t retaddr)\
207 {                                                          \
208     ETYPE data = *((ETYPE *)vd + H(idx));                  \
209     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
210 }
211 
212 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
213 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
214 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
215 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
216 
217 /*
218  *** stride: access vector element from strided memory
219  */
220 static void
221 vext_ldst_stride(void *vd, void *v0, target_ulong base,
222                  target_ulong stride, CPURISCVState *env,
223                  uint32_t desc, uint32_t vm,
224                  vext_ldst_elem_fn *ldst_elem,
225                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
226 {
227     uint32_t i, k;
228     uint32_t nf = vext_nf(desc);
229     uint32_t max_elems = vext_max_elems(desc, esz);
230 
231     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
232         if (!vm && !vext_elem_mask(v0, i)) {
233             continue;
234         }
235 
236         k = 0;
237         while (k < nf) {
238             target_ulong addr = base + stride * i + (k << esz);
239             ldst_elem(env, addr, i + k * max_elems, vd, ra);
240             k++;
241         }
242     }
243     env->vstart = 0;
244 }
245 
246 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
247 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
248                   target_ulong stride, CPURISCVState *env,              \
249                   uint32_t desc)                                        \
250 {                                                                       \
251     uint32_t vm = vext_vm(desc);                                        \
252     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
253                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
254 }
255 
256 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
257 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
258 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
259 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
260 
261 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
262 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
263                   target_ulong stride, CPURISCVState *env,              \
264                   uint32_t desc)                                        \
265 {                                                                       \
266     uint32_t vm = vext_vm(desc);                                        \
267     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
268                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
269 }
270 
271 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
272 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
273 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
274 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
275 
276 /*
277  *** unit-stride: access elements stored contiguously in memory
278  */
279 
280 /* unmasked unit-stride load and store operation*/
281 static void
282 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
283              vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
284              uintptr_t ra, MMUAccessType access_type)
285 {
286     uint32_t i, k;
287     uint32_t nf = vext_nf(desc);
288     uint32_t max_elems = vext_max_elems(desc, esz);
289 
290     /* load bytes from guest memory */
291     for (i = env->vstart; i < evl; i++, env->vstart++) {
292         k = 0;
293         while (k < nf) {
294             target_ulong addr = base + ((i * nf + k) << esz);
295             ldst_elem(env, addr, i + k * max_elems, vd, ra);
296             k++;
297         }
298     }
299     env->vstart = 0;
300 }
301 
302 /*
303  * masked unit-stride load and store operation will be a special case of stride,
304  * stride = NF * sizeof (MTYPE)
305  */
306 
307 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
308 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
309                          CPURISCVState *env, uint32_t desc)             \
310 {                                                                       \
311     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
312     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
313                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
314 }                                                                       \
315                                                                         \
316 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
317                   CPURISCVState *env, uint32_t desc)                    \
318 {                                                                       \
319     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
320                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
321 }
322 
323 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
324 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
325 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
326 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
327 
328 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                            \
329 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,          \
330                          CPURISCVState *env, uint32_t desc)              \
331 {                                                                        \
332     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));              \
333     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,   \
334                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);      \
335 }                                                                        \
336                                                                          \
337 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                 \
338                   CPURISCVState *env, uint32_t desc)                     \
339 {                                                                        \
340     vext_ldst_us(vd, base, env, desc, STORE_FN,                          \
341                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
342 }
343 
344 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
345 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
346 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
347 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
348 
349 /*
350  *** unit stride mask load and store, EEW = 1
351  */
352 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
353                     CPURISCVState *env, uint32_t desc)
354 {
355     /* evl = ceil(vl/8) */
356     uint8_t evl = (env->vl + 7) >> 3;
357     vext_ldst_us(vd, base, env, desc, lde_b,
358                  0, evl, GETPC(), MMU_DATA_LOAD);
359 }
360 
361 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
362                     CPURISCVState *env, uint32_t desc)
363 {
364     /* evl = ceil(vl/8) */
365     uint8_t evl = (env->vl + 7) >> 3;
366     vext_ldst_us(vd, base, env, desc, ste_b,
367                  0, evl, GETPC(), MMU_DATA_STORE);
368 }
369 
370 /*
371  *** index: access vector element from indexed memory
372  */
373 typedef target_ulong vext_get_index_addr(target_ulong base,
374         uint32_t idx, void *vs2);
375 
376 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
377 static target_ulong NAME(target_ulong base,            \
378                          uint32_t idx, void *vs2)      \
379 {                                                      \
380     return (base + *((ETYPE *)vs2 + H(idx)));          \
381 }
382 
383 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
384 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
385 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
386 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
387 
388 static inline void
389 vext_ldst_index(void *vd, void *v0, target_ulong base,
390                 void *vs2, CPURISCVState *env, uint32_t desc,
391                 vext_get_index_addr get_index_addr,
392                 vext_ldst_elem_fn *ldst_elem,
393                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
394 {
395     uint32_t i, k;
396     uint32_t nf = vext_nf(desc);
397     uint32_t vm = vext_vm(desc);
398     uint32_t max_elems = vext_max_elems(desc, esz);
399 
400     /* load bytes from guest memory */
401     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
402         if (!vm && !vext_elem_mask(v0, i)) {
403             continue;
404         }
405 
406         k = 0;
407         while (k < nf) {
408             abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
409             ldst_elem(env, addr, i + k * max_elems, vd, ra);
410             k++;
411         }
412     }
413     env->vstart = 0;
414 }
415 
416 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
417 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
418                   void *vs2, CPURISCVState *env, uint32_t desc)            \
419 {                                                                          \
420     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
421                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
422 }
423 
424 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
425 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
426 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
427 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
428 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
429 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
430 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
431 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
432 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
433 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
434 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
435 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
436 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
437 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
438 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
439 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
440 
441 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
442 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
443                   void *vs2, CPURISCVState *env, uint32_t desc)  \
444 {                                                                \
445     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
446                     STORE_FN, ctzl(sizeof(ETYPE)),               \
447                     GETPC(), MMU_DATA_STORE);                    \
448 }
449 
450 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
451 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
452 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
453 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
454 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
455 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
456 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
457 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
458 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
459 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
460 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
461 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
462 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
463 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
464 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
465 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
466 
467 /*
468  *** unit-stride fault-only-fisrt load instructions
469  */
470 static inline void
471 vext_ldff(void *vd, void *v0, target_ulong base,
472           CPURISCVState *env, uint32_t desc,
473           vext_ldst_elem_fn *ldst_elem,
474           uint32_t esz, uintptr_t ra)
475 {
476     void *host;
477     uint32_t i, k, vl = 0;
478     uint32_t nf = vext_nf(desc);
479     uint32_t vm = vext_vm(desc);
480     uint32_t max_elems = vext_max_elems(desc, esz);
481     target_ulong addr, offset, remain;
482 
483     /* probe every access*/
484     for (i = env->vstart; i < env->vl; i++) {
485         if (!vm && !vext_elem_mask(v0, i)) {
486             continue;
487         }
488         addr = base + i * (nf << esz);
489         if (i == 0) {
490             probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
491         } else {
492             /* if it triggers an exception, no need to check watchpoint */
493             remain = nf << esz;
494             while (remain > 0) {
495                 offset = -(addr | TARGET_PAGE_MASK);
496                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
497                                          cpu_mmu_index(env, false));
498                 if (host) {
499 #ifdef CONFIG_USER_ONLY
500                     if (page_check_range(addr, nf << esz, PAGE_READ) < 0) {
501                         vl = i;
502                         goto ProbeSuccess;
503                     }
504 #else
505                     probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
506 #endif
507                 } else {
508                     vl = i;
509                     goto ProbeSuccess;
510                 }
511                 if (remain <=  offset) {
512                     break;
513                 }
514                 remain -= offset;
515                 addr += offset;
516             }
517         }
518     }
519 ProbeSuccess:
520     /* load bytes from guest memory */
521     if (vl != 0) {
522         env->vl = vl;
523     }
524     for (i = env->vstart; i < env->vl; i++) {
525         k = 0;
526         if (!vm && !vext_elem_mask(v0, i)) {
527             continue;
528         }
529         while (k < nf) {
530             target_ulong addr = base + ((i * nf + k) << esz);
531             ldst_elem(env, addr, i + k * max_elems, vd, ra);
532             k++;
533         }
534     }
535     env->vstart = 0;
536 }
537 
538 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
539 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
540                   CPURISCVState *env, uint32_t desc)      \
541 {                                                         \
542     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
543               ctzl(sizeof(ETYPE)), GETPC());              \
544 }
545 
546 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
547 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
548 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
549 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
550 
551 #define DO_SWAP(N, M) (M)
552 #define DO_AND(N, M)  (N & M)
553 #define DO_XOR(N, M)  (N ^ M)
554 #define DO_OR(N, M)   (N | M)
555 #define DO_ADD(N, M)  (N + M)
556 
557 /* Signed min/max */
558 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
559 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
560 
561 /* Unsigned min/max */
562 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
563 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
564 
565 /*
566  *** load and store whole register instructions
567  */
568 static void
569 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
570                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
571                 MMUAccessType access_type)
572 {
573     uint32_t i, k, off, pos;
574     uint32_t nf = vext_nf(desc);
575     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
576     uint32_t max_elems = vlenb >> esz;
577 
578     k = env->vstart / max_elems;
579     off = env->vstart % max_elems;
580 
581     if (off) {
582         /* load/store rest of elements of current segment pointed by vstart */
583         for (pos = off; pos < max_elems; pos++, env->vstart++) {
584             target_ulong addr = base + ((pos + k * max_elems) << esz);
585             ldst_elem(env, addr, pos + k * max_elems, vd, ra);
586         }
587         k++;
588     }
589 
590     /* load/store elements for rest of segments */
591     for (; k < nf; k++) {
592         for (i = 0; i < max_elems; i++, env->vstart++) {
593             target_ulong addr = base + ((i + k * max_elems) << esz);
594             ldst_elem(env, addr, i + k * max_elems, vd, ra);
595         }
596     }
597 
598     env->vstart = 0;
599 }
600 
601 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
602 void HELPER(NAME)(void *vd, target_ulong base,       \
603                   CPURISCVState *env, uint32_t desc) \
604 {                                                    \
605     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
606                     ctzl(sizeof(ETYPE)), GETPC(),    \
607                     MMU_DATA_LOAD);                  \
608 }
609 
610 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
611 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
612 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
613 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
614 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
615 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
616 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
617 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
618 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
619 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
620 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
621 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
622 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
623 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
624 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
625 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
626 
627 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
628 void HELPER(NAME)(void *vd, target_ulong base,       \
629                   CPURISCVState *env, uint32_t desc) \
630 {                                                    \
631     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
632                     ctzl(sizeof(ETYPE)), GETPC(),    \
633                     MMU_DATA_STORE);                 \
634 }
635 
636 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
637 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
638 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
639 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
640 
641 /*
642  *** Vector Integer Arithmetic Instructions
643  */
644 
645 /* expand macro args before macro */
646 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
647 
648 /* (TD, T1, T2, TX1, TX2) */
649 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
650 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
651 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
652 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
653 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
654 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
655 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
656 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
657 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
658 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
659 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
660 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
661 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
662 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
663 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
664 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
665 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
666 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
667 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
668 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
669 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
670 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
671 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
672 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
673 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
674 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
675 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
676 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
677 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
678 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
679 
680 /* operation of two vector elements */
681 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
682 
683 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
684 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
685 {                                                               \
686     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
687     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
688     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
689 }
690 #define DO_SUB(N, M) (N - M)
691 #define DO_RSUB(N, M) (M - N)
692 
693 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
694 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
695 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
696 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
697 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
698 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
699 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
700 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
701 
702 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
703                        CPURISCVState *env, uint32_t desc,
704                        uint32_t esz, uint32_t dsz,
705                        opivv2_fn *fn)
706 {
707     uint32_t vm = vext_vm(desc);
708     uint32_t vl = env->vl;
709     uint32_t i;
710 
711     for (i = env->vstart; i < vl; i++) {
712         if (!vm && !vext_elem_mask(v0, i)) {
713             continue;
714         }
715         fn(vd, vs1, vs2, i);
716     }
717     env->vstart = 0;
718 }
719 
720 /* generate the helpers for OPIVV */
721 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
722 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
723                   void *vs2, CPURISCVState *env,          \
724                   uint32_t desc)                          \
725 {                                                         \
726     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
727                do_##NAME);                                \
728 }
729 
730 GEN_VEXT_VV(vadd_vv_b, 1, 1)
731 GEN_VEXT_VV(vadd_vv_h, 2, 2)
732 GEN_VEXT_VV(vadd_vv_w, 4, 4)
733 GEN_VEXT_VV(vadd_vv_d, 8, 8)
734 GEN_VEXT_VV(vsub_vv_b, 1, 1)
735 GEN_VEXT_VV(vsub_vv_h, 2, 2)
736 GEN_VEXT_VV(vsub_vv_w, 4, 4)
737 GEN_VEXT_VV(vsub_vv_d, 8, 8)
738 
739 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
740 
741 /*
742  * (T1)s1 gives the real operator type.
743  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
744  */
745 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
746 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
747 {                                                                   \
748     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
749     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
750 }
751 
752 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
753 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
754 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
755 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
756 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
757 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
758 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
759 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
760 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
761 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
762 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
763 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
764 
765 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
766                        CPURISCVState *env, uint32_t desc,
767                        uint32_t esz, uint32_t dsz,
768                        opivx2_fn fn)
769 {
770     uint32_t vm = vext_vm(desc);
771     uint32_t vl = env->vl;
772     uint32_t i;
773 
774     for (i = env->vstart; i < vl; i++) {
775         if (!vm && !vext_elem_mask(v0, i)) {
776             continue;
777         }
778         fn(vd, s1, vs2, i);
779     }
780     env->vstart = 0;
781 }
782 
783 /* generate the helpers for OPIVX */
784 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
785 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
786                   void *vs2, CPURISCVState *env,          \
787                   uint32_t desc)                          \
788 {                                                         \
789     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
790                do_##NAME);                                \
791 }
792 
793 GEN_VEXT_VX(vadd_vx_b, 1, 1)
794 GEN_VEXT_VX(vadd_vx_h, 2, 2)
795 GEN_VEXT_VX(vadd_vx_w, 4, 4)
796 GEN_VEXT_VX(vadd_vx_d, 8, 8)
797 GEN_VEXT_VX(vsub_vx_b, 1, 1)
798 GEN_VEXT_VX(vsub_vx_h, 2, 2)
799 GEN_VEXT_VX(vsub_vx_w, 4, 4)
800 GEN_VEXT_VX(vsub_vx_d, 8, 8)
801 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
802 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
803 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
804 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
805 
806 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
807 {
808     intptr_t oprsz = simd_oprsz(desc);
809     intptr_t i;
810 
811     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
812         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
813     }
814 }
815 
816 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
817 {
818     intptr_t oprsz = simd_oprsz(desc);
819     intptr_t i;
820 
821     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
822         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
823     }
824 }
825 
826 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
827 {
828     intptr_t oprsz = simd_oprsz(desc);
829     intptr_t i;
830 
831     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
832         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
833     }
834 }
835 
836 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
837 {
838     intptr_t oprsz = simd_oprsz(desc);
839     intptr_t i;
840 
841     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
842         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
843     }
844 }
845 
846 /* Vector Widening Integer Add/Subtract */
847 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
848 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
849 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
850 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
851 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
852 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
853 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
854 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
855 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
856 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
857 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
858 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
859 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
860 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
861 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
862 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
863 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
864 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
865 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
866 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
867 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
868 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
869 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
870 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
871 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
872 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
873 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
874 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
875 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
876 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
877 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
878 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
879 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
880 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
881 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
882 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
883 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
884 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
885 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
886 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
887 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
888 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
889 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
890 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
891 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
892 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
893 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
894 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
895 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
896 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
897 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
898 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
899 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
900 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
901 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
902 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
903 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
904 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
905 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
906 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
907 
908 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
909 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
910 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
911 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
912 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
913 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
914 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
915 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
916 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
917 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
918 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
919 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
920 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
921 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
922 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
923 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
924 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
925 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
926 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
927 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
928 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
929 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
930 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
931 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
932 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
933 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
934 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
935 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
936 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
937 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
938 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
939 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
940 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
941 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
942 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
943 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
944 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
945 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
946 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
947 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
948 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
949 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
950 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
951 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
952 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
953 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
954 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
955 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
956 
957 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
958 #define DO_VADC(N, M, C) (N + M + C)
959 #define DO_VSBC(N, M, C) (N - M - C)
960 
961 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
962 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
963                   CPURISCVState *env, uint32_t desc)          \
964 {                                                             \
965     uint32_t vl = env->vl;                                    \
966     uint32_t i;                                               \
967                                                               \
968     for (i = env->vstart; i < vl; i++) {                      \
969         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
970         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
971         ETYPE carry = vext_elem_mask(v0, i);                  \
972                                                               \
973         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
974     }                                                         \
975     env->vstart = 0;                                          \
976 }
977 
978 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
979 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
980 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
981 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
982 
983 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
984 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
985 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
986 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
987 
988 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
989 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
990                   CPURISCVState *env, uint32_t desc)                     \
991 {                                                                        \
992     uint32_t vl = env->vl;                                               \
993     uint32_t i;                                                          \
994                                                                          \
995     for (i = env->vstart; i < vl; i++) {                                 \
996         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
997         ETYPE carry = vext_elem_mask(v0, i);                             \
998                                                                          \
999         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1000     }                                                                    \
1001     env->vstart = 0;                                          \
1002 }
1003 
1004 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
1005 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1006 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1007 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1008 
1009 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
1010 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1011 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1012 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1013 
1014 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
1015                           (__typeof(N))(N + M) < N)
1016 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1017 
1018 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
1019 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1020                   CPURISCVState *env, uint32_t desc)          \
1021 {                                                             \
1022     uint32_t vl = env->vl;                                    \
1023     uint32_t vm = vext_vm(desc);                              \
1024     uint32_t i;                                               \
1025                                                               \
1026     for (i = env->vstart; i < vl; i++) {                      \
1027         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1028         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1029         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1030         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1031     }                                                         \
1032     env->vstart = 0;                                          \
1033 }
1034 
1035 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1036 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1037 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1038 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1039 
1040 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1041 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1042 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1043 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1044 
1045 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1046 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1047                   void *vs2, CPURISCVState *env, uint32_t desc) \
1048 {                                                               \
1049     uint32_t vl = env->vl;                                      \
1050     uint32_t vm = vext_vm(desc);                                \
1051     uint32_t i;                                                 \
1052                                                                 \
1053     for (i = env->vstart; i < vl; i++) {                        \
1054         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1055         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1056         vext_set_elem_mask(vd, i,                               \
1057                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1058     }                                                           \
1059     env->vstart = 0;                                            \
1060 }
1061 
1062 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1063 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1064 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1065 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1066 
1067 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1068 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1069 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1070 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1071 
1072 /* Vector Bitwise Logical Instructions */
1073 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1074 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1075 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1076 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1077 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1078 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1079 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1080 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1081 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1082 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1083 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1084 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1085 GEN_VEXT_VV(vand_vv_b, 1, 1)
1086 GEN_VEXT_VV(vand_vv_h, 2, 2)
1087 GEN_VEXT_VV(vand_vv_w, 4, 4)
1088 GEN_VEXT_VV(vand_vv_d, 8, 8)
1089 GEN_VEXT_VV(vor_vv_b, 1, 1)
1090 GEN_VEXT_VV(vor_vv_h, 2, 2)
1091 GEN_VEXT_VV(vor_vv_w, 4, 4)
1092 GEN_VEXT_VV(vor_vv_d, 8, 8)
1093 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1094 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1095 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1096 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1097 
1098 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1099 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1100 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1101 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1102 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1103 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1104 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1105 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1106 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1107 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1108 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1109 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1110 GEN_VEXT_VX(vand_vx_b, 1, 1)
1111 GEN_VEXT_VX(vand_vx_h, 2, 2)
1112 GEN_VEXT_VX(vand_vx_w, 4, 4)
1113 GEN_VEXT_VX(vand_vx_d, 8, 8)
1114 GEN_VEXT_VX(vor_vx_b, 1, 1)
1115 GEN_VEXT_VX(vor_vx_h, 2, 2)
1116 GEN_VEXT_VX(vor_vx_w, 4, 4)
1117 GEN_VEXT_VX(vor_vx_d, 8, 8)
1118 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1119 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1120 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1121 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1122 
1123 /* Vector Single-Width Bit Shift Instructions */
1124 #define DO_SLL(N, M)  (N << (M))
1125 #define DO_SRL(N, M)  (N >> (M))
1126 
1127 /* generate the helpers for shift instructions with two vector operators */
1128 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1129 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1130                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1131 {                                                                         \
1132     uint32_t vm = vext_vm(desc);                                          \
1133     uint32_t vl = env->vl;                                                \
1134     uint32_t i;                                                           \
1135                                                                           \
1136     for (i = env->vstart; i < vl; i++) {                                  \
1137         if (!vm && !vext_elem_mask(v0, i)) {                              \
1138             continue;                                                     \
1139         }                                                                 \
1140         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1141         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1142         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1143     }                                                                     \
1144     env->vstart = 0;                                                      \
1145 }
1146 
1147 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1148 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1149 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1150 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1151 
1152 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1153 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1154 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1155 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1156 
1157 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1158 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1159 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1160 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1161 
1162 /* generate the helpers for shift instructions with one vector and one scalar */
1163 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1164 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1165         void *vs2, CPURISCVState *env, uint32_t desc)       \
1166 {                                                           \
1167     uint32_t vm = vext_vm(desc);                            \
1168     uint32_t vl = env->vl;                                  \
1169     uint32_t i;                                             \
1170                                                             \
1171     for (i = env->vstart; i < vl; i++) {                    \
1172         if (!vm && !vext_elem_mask(v0, i)) {                \
1173             continue;                                       \
1174         }                                                   \
1175         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1176         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1177     }                                                       \
1178     env->vstart = 0;                                        \
1179 }
1180 
1181 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1182 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1183 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1184 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1185 
1186 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1187 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1188 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1189 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1190 
1191 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1192 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1193 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1194 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1195 
1196 /* Vector Narrowing Integer Right Shift Instructions */
1197 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1198 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1199 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1200 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1201 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1202 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1203 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1204 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1205 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1206 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1207 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1208 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1209 
1210 /* Vector Integer Comparison Instructions */
1211 #define DO_MSEQ(N, M) (N == M)
1212 #define DO_MSNE(N, M) (N != M)
1213 #define DO_MSLT(N, M) (N < M)
1214 #define DO_MSLE(N, M) (N <= M)
1215 #define DO_MSGT(N, M) (N > M)
1216 
1217 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1218 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1219                   CPURISCVState *env, uint32_t desc)          \
1220 {                                                             \
1221     uint32_t vm = vext_vm(desc);                              \
1222     uint32_t vl = env->vl;                                    \
1223     uint32_t i;                                               \
1224                                                               \
1225     for (i = env->vstart; i < vl; i++) {                      \
1226         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1227         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1228         if (!vm && !vext_elem_mask(v0, i)) {                  \
1229             continue;                                         \
1230         }                                                     \
1231         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1232     }                                                         \
1233     env->vstart = 0;                                          \
1234 }
1235 
1236 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1237 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1238 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1239 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1240 
1241 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1242 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1243 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1244 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1245 
1246 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1247 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1248 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1249 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1250 
1251 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1252 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1253 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1254 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1255 
1256 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1257 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1258 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1259 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1260 
1261 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1262 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1263 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1264 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1265 
1266 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1267 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1268                   CPURISCVState *env, uint32_t desc)                \
1269 {                                                                   \
1270     uint32_t vm = vext_vm(desc);                                    \
1271     uint32_t vl = env->vl;                                          \
1272     uint32_t i;                                                     \
1273                                                                     \
1274     for (i = env->vstart; i < vl; i++) {                            \
1275         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1276         if (!vm && !vext_elem_mask(v0, i)) {                        \
1277             continue;                                               \
1278         }                                                           \
1279         vext_set_elem_mask(vd, i,                                   \
1280                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1281     }                                                               \
1282     env->vstart = 0;                                                \
1283 }
1284 
1285 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1286 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1287 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1288 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1289 
1290 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1291 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1292 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1293 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1294 
1295 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1296 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1297 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1298 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1299 
1300 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1301 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1302 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1303 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1304 
1305 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1306 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1307 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1308 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1309 
1310 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1311 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1312 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1313 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1314 
1315 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1316 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1317 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1318 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1319 
1320 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1321 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1322 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1323 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1324 
1325 /* Vector Integer Min/Max Instructions */
1326 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1327 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1328 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1329 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1330 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1331 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1332 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1333 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1334 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1335 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1336 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1337 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1338 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1339 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1340 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1341 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1342 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1343 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1344 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1345 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1346 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1347 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1348 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1349 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1350 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1351 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1352 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1353 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1354 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1355 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1356 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1357 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1358 
1359 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1360 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1361 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1362 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1363 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1364 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1365 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1366 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1367 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1368 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1369 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1370 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1371 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1372 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1373 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1374 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1375 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1376 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1377 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1378 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1379 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1380 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1381 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1382 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1383 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1384 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1385 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1386 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1387 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1388 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1389 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1390 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1391 
1392 /* Vector Single-Width Integer Multiply Instructions */
1393 #define DO_MUL(N, M) (N * M)
1394 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1395 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1396 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1397 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1398 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1399 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1400 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1401 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1402 
1403 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1404 {
1405     return (int16_t)s2 * (int16_t)s1 >> 8;
1406 }
1407 
1408 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1409 {
1410     return (int32_t)s2 * (int32_t)s1 >> 16;
1411 }
1412 
1413 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1414 {
1415     return (int64_t)s2 * (int64_t)s1 >> 32;
1416 }
1417 
1418 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1419 {
1420     uint64_t hi_64, lo_64;
1421 
1422     muls64(&lo_64, &hi_64, s1, s2);
1423     return hi_64;
1424 }
1425 
1426 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1427 {
1428     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1429 }
1430 
1431 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1432 {
1433     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1434 }
1435 
1436 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1437 {
1438     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1439 }
1440 
1441 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1442 {
1443     uint64_t hi_64, lo_64;
1444 
1445     mulu64(&lo_64, &hi_64, s2, s1);
1446     return hi_64;
1447 }
1448 
1449 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1450 {
1451     return (int16_t)s2 * (uint16_t)s1 >> 8;
1452 }
1453 
1454 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1455 {
1456     return (int32_t)s2 * (uint32_t)s1 >> 16;
1457 }
1458 
1459 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1460 {
1461     return (int64_t)s2 * (uint64_t)s1 >> 32;
1462 }
1463 
1464 /*
1465  * Let  A = signed operand,
1466  *      B = unsigned operand
1467  *      P = mulu64(A, B), unsigned product
1468  *
1469  * LET  X = 2 ** 64  - A, 2's complement of A
1470  *      SP = signed product
1471  * THEN
1472  *      IF A < 0
1473  *          SP = -X * B
1474  *             = -(2 ** 64 - A) * B
1475  *             = A * B - 2 ** 64 * B
1476  *             = P - 2 ** 64 * B
1477  *      ELSE
1478  *          SP = P
1479  * THEN
1480  *      HI_P -= (A < 0 ? B : 0)
1481  */
1482 
1483 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1484 {
1485     uint64_t hi_64, lo_64;
1486 
1487     mulu64(&lo_64, &hi_64, s2, s1);
1488 
1489     hi_64 -= s2 < 0 ? s1 : 0;
1490     return hi_64;
1491 }
1492 
1493 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1494 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1495 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1496 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1497 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1498 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1499 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1500 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1501 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1502 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1503 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1504 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1505 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1506 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1507 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1508 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1509 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1510 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1511 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1512 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1513 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1514 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1515 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1516 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1517 
1518 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1519 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1520 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1521 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1522 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1523 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1524 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1525 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1526 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1527 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1528 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1529 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1530 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1531 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1532 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1533 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1534 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1535 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1536 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1537 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1538 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1539 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1540 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1541 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1542 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1543 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1544 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1545 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1546 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1547 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1548 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1549 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1550 
1551 /* Vector Integer Divide Instructions */
1552 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1553 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1554 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1555         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1556 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1557         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1558 
1559 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1560 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1561 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1562 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1563 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1564 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1565 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1566 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1567 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1568 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1569 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1570 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1571 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1572 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1573 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1574 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1575 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1576 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1577 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1578 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1579 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1580 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1581 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1582 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1583 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1584 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1585 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1586 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1587 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1588 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1589 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1590 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1591 
1592 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1593 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1594 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1595 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1596 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1597 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1598 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1599 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1600 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1601 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1602 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1603 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1604 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1605 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1606 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1607 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1608 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1609 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1610 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1611 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1612 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1613 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1614 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1615 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1616 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1617 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1618 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1619 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1620 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1621 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1622 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1623 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1624 
1625 /* Vector Widening Integer Multiply Instructions */
1626 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1627 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1628 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1629 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1630 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1631 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1632 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1633 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1634 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1635 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1636 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1637 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1638 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1639 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1640 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1641 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1642 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1643 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1644 
1645 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1646 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1647 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1648 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1649 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1650 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1651 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1652 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1653 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1654 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1655 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1656 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1657 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1658 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1659 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1660 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1661 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1662 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1663 
1664 /* Vector Single-Width Integer Multiply-Add Instructions */
1665 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1666 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1667 {                                                                  \
1668     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1669     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1670     TD d = *((TD *)vd + HD(i));                                    \
1671     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1672 }
1673 
1674 #define DO_MACC(N, M, D) (M * N + D)
1675 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1676 #define DO_MADD(N, M, D) (M * D + N)
1677 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1678 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1679 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1680 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1681 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1682 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1683 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1684 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1685 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1686 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1687 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1688 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1689 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1690 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1691 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1692 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1693 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1694 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1695 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1696 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1697 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1698 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1699 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1700 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1701 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1702 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1703 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1704 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1705 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1706 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1707 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1708 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1709 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1710 
1711 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1712 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1713 {                                                                   \
1714     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1715     TD d = *((TD *)vd + HD(i));                                     \
1716     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1717 }
1718 
1719 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1720 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1721 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1722 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1723 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1724 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1725 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1726 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1727 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1728 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1729 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1730 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1731 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1732 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1733 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1734 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1735 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1736 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1737 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1738 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1739 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1740 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1741 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1742 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1743 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1744 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1745 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1746 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1747 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1748 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1749 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1750 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1751 
1752 /* Vector Widening Integer Multiply-Add Instructions */
1753 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1754 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1755 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1756 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1757 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1758 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1759 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1760 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1761 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1762 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1763 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1764 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1765 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1766 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1767 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1768 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1769 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1770 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1771 
1772 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1773 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1774 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1775 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1776 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1777 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1778 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1779 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1780 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1781 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1782 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1783 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1784 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1785 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1786 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1787 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1788 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1789 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1790 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1791 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1792 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1793 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1794 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1795 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1796 
1797 /* Vector Integer Merge and Move Instructions */
1798 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1799 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1800                   uint32_t desc)                                     \
1801 {                                                                    \
1802     uint32_t vl = env->vl;                                           \
1803     uint32_t i;                                                      \
1804                                                                      \
1805     for (i = env->vstart; i < vl; i++) {                             \
1806         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1807         *((ETYPE *)vd + H(i)) = s1;                                  \
1808     }                                                                \
1809     env->vstart = 0;                                                 \
1810 }
1811 
1812 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1813 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1814 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1815 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1816 
1817 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1818 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1819                   uint32_t desc)                                     \
1820 {                                                                    \
1821     uint32_t vl = env->vl;                                           \
1822     uint32_t i;                                                      \
1823                                                                      \
1824     for (i = env->vstart; i < vl; i++) {                             \
1825         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1826     }                                                                \
1827     env->vstart = 0;                                                 \
1828 }
1829 
1830 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1831 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1832 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1833 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1834 
1835 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1836 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1837                   CPURISCVState *env, uint32_t desc)                 \
1838 {                                                                    \
1839     uint32_t vl = env->vl;                                           \
1840     uint32_t i;                                                      \
1841                                                                      \
1842     for (i = env->vstart; i < vl; i++) {                             \
1843         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1844         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1845     }                                                                \
1846     env->vstart = 0;                                                 \
1847 }
1848 
1849 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1850 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1851 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1852 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1853 
1854 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1855 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1856                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1857 {                                                                    \
1858     uint32_t vl = env->vl;                                           \
1859     uint32_t i;                                                      \
1860                                                                      \
1861     for (i = env->vstart; i < vl; i++) {                             \
1862         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1863         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1864                    (ETYPE)(target_long)s1);                          \
1865         *((ETYPE *)vd + H(i)) = d;                                   \
1866     }                                                                \
1867     env->vstart = 0;                                                 \
1868 }
1869 
1870 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1871 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1872 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1873 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1874 
1875 /*
1876  *** Vector Fixed-Point Arithmetic Instructions
1877  */
1878 
1879 /* Vector Single-Width Saturating Add and Subtract */
1880 
1881 /*
1882  * As fixed point instructions probably have round mode and saturation,
1883  * define common macros for fixed point here.
1884  */
1885 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1886                           CPURISCVState *env, int vxrm);
1887 
1888 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1889 static inline void                                                  \
1890 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1891           CPURISCVState *env, int vxrm)                             \
1892 {                                                                   \
1893     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1894     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1895     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1896 }
1897 
1898 static inline void
1899 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1900              CPURISCVState *env,
1901              uint32_t vl, uint32_t vm, int vxrm,
1902              opivv2_rm_fn *fn)
1903 {
1904     for (uint32_t i = env->vstart; i < vl; i++) {
1905         if (!vm && !vext_elem_mask(v0, i)) {
1906             continue;
1907         }
1908         fn(vd, vs1, vs2, i, env, vxrm);
1909     }
1910     env->vstart = 0;
1911 }
1912 
1913 static inline void
1914 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1915              CPURISCVState *env,
1916              uint32_t desc, uint32_t esz, uint32_t dsz,
1917              opivv2_rm_fn *fn)
1918 {
1919     uint32_t vm = vext_vm(desc);
1920     uint32_t vl = env->vl;
1921 
1922     switch (env->vxrm) {
1923     case 0: /* rnu */
1924         vext_vv_rm_1(vd, v0, vs1, vs2,
1925                      env, vl, vm, 0, fn);
1926         break;
1927     case 1: /* rne */
1928         vext_vv_rm_1(vd, v0, vs1, vs2,
1929                      env, vl, vm, 1, fn);
1930         break;
1931     case 2: /* rdn */
1932         vext_vv_rm_1(vd, v0, vs1, vs2,
1933                      env, vl, vm, 2, fn);
1934         break;
1935     default: /* rod */
1936         vext_vv_rm_1(vd, v0, vs1, vs2,
1937                      env, vl, vm, 3, fn);
1938         break;
1939     }
1940 }
1941 
1942 /* generate helpers for fixed point instructions with OPIVV format */
1943 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1944 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1945                   CPURISCVState *env, uint32_t desc)            \
1946 {                                                               \
1947     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1948                  do_##NAME);                                    \
1949 }
1950 
1951 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1952 {
1953     uint8_t res = a + b;
1954     if (res < a) {
1955         res = UINT8_MAX;
1956         env->vxsat = 0x1;
1957     }
1958     return res;
1959 }
1960 
1961 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1962                                uint16_t b)
1963 {
1964     uint16_t res = a + b;
1965     if (res < a) {
1966         res = UINT16_MAX;
1967         env->vxsat = 0x1;
1968     }
1969     return res;
1970 }
1971 
1972 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1973                                uint32_t b)
1974 {
1975     uint32_t res = a + b;
1976     if (res < a) {
1977         res = UINT32_MAX;
1978         env->vxsat = 0x1;
1979     }
1980     return res;
1981 }
1982 
1983 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1984                                uint64_t b)
1985 {
1986     uint64_t res = a + b;
1987     if (res < a) {
1988         res = UINT64_MAX;
1989         env->vxsat = 0x1;
1990     }
1991     return res;
1992 }
1993 
1994 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1995 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1996 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
1997 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
1998 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
1999 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
2000 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
2001 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
2002 
2003 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2004                           CPURISCVState *env, int vxrm);
2005 
2006 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2007 static inline void                                                  \
2008 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2009           CPURISCVState *env, int vxrm)                             \
2010 {                                                                   \
2011     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2012     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2013 }
2014 
2015 static inline void
2016 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2017              CPURISCVState *env,
2018              uint32_t vl, uint32_t vm, int vxrm,
2019              opivx2_rm_fn *fn)
2020 {
2021     for (uint32_t i = env->vstart; i < vl; i++) {
2022         if (!vm && !vext_elem_mask(v0, i)) {
2023             continue;
2024         }
2025         fn(vd, s1, vs2, i, env, vxrm);
2026     }
2027     env->vstart = 0;
2028 }
2029 
2030 static inline void
2031 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2032              CPURISCVState *env,
2033              uint32_t desc, uint32_t esz, uint32_t dsz,
2034              opivx2_rm_fn *fn)
2035 {
2036     uint32_t vm = vext_vm(desc);
2037     uint32_t vl = env->vl;
2038 
2039     switch (env->vxrm) {
2040     case 0: /* rnu */
2041         vext_vx_rm_1(vd, v0, s1, vs2,
2042                      env, vl, vm, 0, fn);
2043         break;
2044     case 1: /* rne */
2045         vext_vx_rm_1(vd, v0, s1, vs2,
2046                      env, vl, vm, 1, fn);
2047         break;
2048     case 2: /* rdn */
2049         vext_vx_rm_1(vd, v0, s1, vs2,
2050                      env, vl, vm, 2, fn);
2051         break;
2052     default: /* rod */
2053         vext_vx_rm_1(vd, v0, s1, vs2,
2054                      env, vl, vm, 3, fn);
2055         break;
2056     }
2057 }
2058 
2059 /* generate helpers for fixed point instructions with OPIVX format */
2060 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2061 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2062         void *vs2, CPURISCVState *env, uint32_t desc)     \
2063 {                                                         \
2064     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2065                  do_##NAME);                              \
2066 }
2067 
2068 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2069 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2070 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2071 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2072 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2073 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2074 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2075 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2076 
2077 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2078 {
2079     int8_t res = a + b;
2080     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2081         res = a > 0 ? INT8_MAX : INT8_MIN;
2082         env->vxsat = 0x1;
2083     }
2084     return res;
2085 }
2086 
2087 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2088 {
2089     int16_t res = a + b;
2090     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2091         res = a > 0 ? INT16_MAX : INT16_MIN;
2092         env->vxsat = 0x1;
2093     }
2094     return res;
2095 }
2096 
2097 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2098 {
2099     int32_t res = a + b;
2100     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2101         res = a > 0 ? INT32_MAX : INT32_MIN;
2102         env->vxsat = 0x1;
2103     }
2104     return res;
2105 }
2106 
2107 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2108 {
2109     int64_t res = a + b;
2110     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2111         res = a > 0 ? INT64_MAX : INT64_MIN;
2112         env->vxsat = 0x1;
2113     }
2114     return res;
2115 }
2116 
2117 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2118 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2119 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2120 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2121 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2122 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2123 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2124 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2125 
2126 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2127 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2128 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2129 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2130 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2131 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2132 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2133 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2134 
2135 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2136 {
2137     uint8_t res = a - b;
2138     if (res > a) {
2139         res = 0;
2140         env->vxsat = 0x1;
2141     }
2142     return res;
2143 }
2144 
2145 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2146                                uint16_t b)
2147 {
2148     uint16_t res = a - b;
2149     if (res > a) {
2150         res = 0;
2151         env->vxsat = 0x1;
2152     }
2153     return res;
2154 }
2155 
2156 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2157                                uint32_t b)
2158 {
2159     uint32_t res = a - b;
2160     if (res > a) {
2161         res = 0;
2162         env->vxsat = 0x1;
2163     }
2164     return res;
2165 }
2166 
2167 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2168                                uint64_t b)
2169 {
2170     uint64_t res = a - b;
2171     if (res > a) {
2172         res = 0;
2173         env->vxsat = 0x1;
2174     }
2175     return res;
2176 }
2177 
2178 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2179 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2180 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2181 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2182 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2183 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2184 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2185 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2186 
2187 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2188 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2189 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2190 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2191 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2192 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2193 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2194 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2195 
2196 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2197 {
2198     int8_t res = a - b;
2199     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2200         res = a >= 0 ? INT8_MAX : INT8_MIN;
2201         env->vxsat = 0x1;
2202     }
2203     return res;
2204 }
2205 
2206 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2207 {
2208     int16_t res = a - b;
2209     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2210         res = a >= 0 ? INT16_MAX : INT16_MIN;
2211         env->vxsat = 0x1;
2212     }
2213     return res;
2214 }
2215 
2216 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2217 {
2218     int32_t res = a - b;
2219     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2220         res = a >= 0 ? INT32_MAX : INT32_MIN;
2221         env->vxsat = 0x1;
2222     }
2223     return res;
2224 }
2225 
2226 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2227 {
2228     int64_t res = a - b;
2229     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2230         res = a >= 0 ? INT64_MAX : INT64_MIN;
2231         env->vxsat = 0x1;
2232     }
2233     return res;
2234 }
2235 
2236 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2237 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2238 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2239 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2240 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2241 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2242 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2243 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2244 
2245 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2246 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2247 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2248 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2249 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2250 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2251 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2252 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2253 
2254 /* Vector Single-Width Averaging Add and Subtract */
2255 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2256 {
2257     uint8_t d = extract64(v, shift, 1);
2258     uint8_t d1;
2259     uint64_t D1, D2;
2260 
2261     if (shift == 0 || shift > 64) {
2262         return 0;
2263     }
2264 
2265     d1 = extract64(v, shift - 1, 1);
2266     D1 = extract64(v, 0, shift);
2267     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2268         return d1;
2269     } else if (vxrm == 1) { /* round-to-nearest-even */
2270         if (shift > 1) {
2271             D2 = extract64(v, 0, shift - 1);
2272             return d1 & ((D2 != 0) | d);
2273         } else {
2274             return d1 & d;
2275         }
2276     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2277         return !d & (D1 != 0);
2278     }
2279     return 0; /* round-down (truncate) */
2280 }
2281 
2282 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2283 {
2284     int64_t res = (int64_t)a + b;
2285     uint8_t round = get_round(vxrm, res, 1);
2286 
2287     return (res >> 1) + round;
2288 }
2289 
2290 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2291 {
2292     int64_t res = a + b;
2293     uint8_t round = get_round(vxrm, res, 1);
2294     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2295 
2296     /* With signed overflow, bit 64 is inverse of bit 63. */
2297     return ((res >> 1) ^ over) + round;
2298 }
2299 
2300 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2301 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2302 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2303 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2304 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2305 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2306 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2307 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2308 
2309 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2310 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2311 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2312 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2313 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2314 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2315 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2316 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2317 
2318 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2319                                uint32_t a, uint32_t b)
2320 {
2321     uint64_t res = (uint64_t)a + b;
2322     uint8_t round = get_round(vxrm, res, 1);
2323 
2324     return (res >> 1) + round;
2325 }
2326 
2327 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2328                                uint64_t a, uint64_t b)
2329 {
2330     uint64_t res = a + b;
2331     uint8_t round = get_round(vxrm, res, 1);
2332     uint64_t over = (uint64_t)(res < a) << 63;
2333 
2334     return ((res >> 1) | over) + round;
2335 }
2336 
2337 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2338 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2339 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2340 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2341 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2342 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2343 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2344 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2345 
2346 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2347 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2348 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2349 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2350 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2351 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2352 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2353 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2354 
2355 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2356 {
2357     int64_t res = (int64_t)a - b;
2358     uint8_t round = get_round(vxrm, res, 1);
2359 
2360     return (res >> 1) + round;
2361 }
2362 
2363 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2364 {
2365     int64_t res = (int64_t)a - b;
2366     uint8_t round = get_round(vxrm, res, 1);
2367     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2368 
2369     /* With signed overflow, bit 64 is inverse of bit 63. */
2370     return ((res >> 1) ^ over) + round;
2371 }
2372 
2373 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2374 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2375 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2376 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2377 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2378 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2379 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2380 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2381 
2382 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2383 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2384 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2385 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2386 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2387 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2388 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2389 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2390 
2391 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2392                                uint32_t a, uint32_t b)
2393 {
2394     int64_t res = (int64_t)a - b;
2395     uint8_t round = get_round(vxrm, res, 1);
2396 
2397     return (res >> 1) + round;
2398 }
2399 
2400 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2401                                uint64_t a, uint64_t b)
2402 {
2403     uint64_t res = (uint64_t)a - b;
2404     uint8_t round = get_round(vxrm, res, 1);
2405     uint64_t over = (uint64_t)(res > a) << 63;
2406 
2407     return ((res >> 1) | over) + round;
2408 }
2409 
2410 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2411 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2412 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2413 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2414 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2415 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2416 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2417 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2418 
2419 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2420 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2421 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2422 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2423 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2424 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2425 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2426 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2427 
2428 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2429 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2430 {
2431     uint8_t round;
2432     int16_t res;
2433 
2434     res = (int16_t)a * (int16_t)b;
2435     round = get_round(vxrm, res, 7);
2436     res   = (res >> 7) + round;
2437 
2438     if (res > INT8_MAX) {
2439         env->vxsat = 0x1;
2440         return INT8_MAX;
2441     } else if (res < INT8_MIN) {
2442         env->vxsat = 0x1;
2443         return INT8_MIN;
2444     } else {
2445         return res;
2446     }
2447 }
2448 
2449 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2450 {
2451     uint8_t round;
2452     int32_t res;
2453 
2454     res = (int32_t)a * (int32_t)b;
2455     round = get_round(vxrm, res, 15);
2456     res   = (res >> 15) + round;
2457 
2458     if (res > INT16_MAX) {
2459         env->vxsat = 0x1;
2460         return INT16_MAX;
2461     } else if (res < INT16_MIN) {
2462         env->vxsat = 0x1;
2463         return INT16_MIN;
2464     } else {
2465         return res;
2466     }
2467 }
2468 
2469 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2470 {
2471     uint8_t round;
2472     int64_t res;
2473 
2474     res = (int64_t)a * (int64_t)b;
2475     round = get_round(vxrm, res, 31);
2476     res   = (res >> 31) + round;
2477 
2478     if (res > INT32_MAX) {
2479         env->vxsat = 0x1;
2480         return INT32_MAX;
2481     } else if (res < INT32_MIN) {
2482         env->vxsat = 0x1;
2483         return INT32_MIN;
2484     } else {
2485         return res;
2486     }
2487 }
2488 
2489 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2490 {
2491     uint8_t round;
2492     uint64_t hi_64, lo_64;
2493     int64_t res;
2494 
2495     if (a == INT64_MIN && b == INT64_MIN) {
2496         env->vxsat = 1;
2497         return INT64_MAX;
2498     }
2499 
2500     muls64(&lo_64, &hi_64, a, b);
2501     round = get_round(vxrm, lo_64, 63);
2502     /*
2503      * Cannot overflow, as there are always
2504      * 2 sign bits after multiply.
2505      */
2506     res = (hi_64 << 1) | (lo_64 >> 63);
2507     if (round) {
2508         if (res == INT64_MAX) {
2509             env->vxsat = 1;
2510         } else {
2511             res += 1;
2512         }
2513     }
2514     return res;
2515 }
2516 
2517 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2518 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2519 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2520 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2521 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2522 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2523 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2524 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2525 
2526 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2527 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2528 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2529 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2530 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2531 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2532 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2533 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2534 
2535 /* Vector Single-Width Scaling Shift Instructions */
2536 static inline uint8_t
2537 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2538 {
2539     uint8_t round, shift = b & 0x7;
2540     uint8_t res;
2541 
2542     round = get_round(vxrm, a, shift);
2543     res   = (a >> shift)  + round;
2544     return res;
2545 }
2546 static inline uint16_t
2547 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2548 {
2549     uint8_t round, shift = b & 0xf;
2550     uint16_t res;
2551 
2552     round = get_round(vxrm, a, shift);
2553     res   = (a >> shift)  + round;
2554     return res;
2555 }
2556 static inline uint32_t
2557 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2558 {
2559     uint8_t round, shift = b & 0x1f;
2560     uint32_t res;
2561 
2562     round = get_round(vxrm, a, shift);
2563     res   = (a >> shift)  + round;
2564     return res;
2565 }
2566 static inline uint64_t
2567 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2568 {
2569     uint8_t round, shift = b & 0x3f;
2570     uint64_t res;
2571 
2572     round = get_round(vxrm, a, shift);
2573     res   = (a >> shift)  + round;
2574     return res;
2575 }
2576 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2577 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2578 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2579 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2580 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2581 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2582 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2583 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2584 
2585 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2586 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2587 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2588 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2589 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2590 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2591 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2592 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2593 
2594 static inline int8_t
2595 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2596 {
2597     uint8_t round, shift = b & 0x7;
2598     int8_t res;
2599 
2600     round = get_round(vxrm, a, shift);
2601     res   = (a >> shift)  + round;
2602     return res;
2603 }
2604 static inline int16_t
2605 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2606 {
2607     uint8_t round, shift = b & 0xf;
2608     int16_t res;
2609 
2610     round = get_round(vxrm, a, shift);
2611     res   = (a >> shift)  + round;
2612     return res;
2613 }
2614 static inline int32_t
2615 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2616 {
2617     uint8_t round, shift = b & 0x1f;
2618     int32_t res;
2619 
2620     round = get_round(vxrm, a, shift);
2621     res   = (a >> shift)  + round;
2622     return res;
2623 }
2624 static inline int64_t
2625 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2626 {
2627     uint8_t round, shift = b & 0x3f;
2628     int64_t res;
2629 
2630     round = get_round(vxrm, a, shift);
2631     res   = (a >> shift)  + round;
2632     return res;
2633 }
2634 
2635 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2636 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2637 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2638 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2639 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2640 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2641 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2642 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2643 
2644 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2645 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2646 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2647 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2648 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2649 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2650 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2651 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2652 
2653 /* Vector Narrowing Fixed-Point Clip Instructions */
2654 static inline int8_t
2655 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2656 {
2657     uint8_t round, shift = b & 0xf;
2658     int16_t res;
2659 
2660     round = get_round(vxrm, a, shift);
2661     res   = (a >> shift)  + round;
2662     if (res > INT8_MAX) {
2663         env->vxsat = 0x1;
2664         return INT8_MAX;
2665     } else if (res < INT8_MIN) {
2666         env->vxsat = 0x1;
2667         return INT8_MIN;
2668     } else {
2669         return res;
2670     }
2671 }
2672 
2673 static inline int16_t
2674 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2675 {
2676     uint8_t round, shift = b & 0x1f;
2677     int32_t res;
2678 
2679     round = get_round(vxrm, a, shift);
2680     res   = (a >> shift)  + round;
2681     if (res > INT16_MAX) {
2682         env->vxsat = 0x1;
2683         return INT16_MAX;
2684     } else if (res < INT16_MIN) {
2685         env->vxsat = 0x1;
2686         return INT16_MIN;
2687     } else {
2688         return res;
2689     }
2690 }
2691 
2692 static inline int32_t
2693 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2694 {
2695     uint8_t round, shift = b & 0x3f;
2696     int64_t res;
2697 
2698     round = get_round(vxrm, a, shift);
2699     res   = (a >> shift)  + round;
2700     if (res > INT32_MAX) {
2701         env->vxsat = 0x1;
2702         return INT32_MAX;
2703     } else if (res < INT32_MIN) {
2704         env->vxsat = 0x1;
2705         return INT32_MIN;
2706     } else {
2707         return res;
2708     }
2709 }
2710 
2711 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2712 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2713 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2714 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2715 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2716 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2717 
2718 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2719 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2720 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2721 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2722 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2723 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2724 
2725 static inline uint8_t
2726 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2727 {
2728     uint8_t round, shift = b & 0xf;
2729     uint16_t res;
2730 
2731     round = get_round(vxrm, a, shift);
2732     res   = (a >> shift)  + round;
2733     if (res > UINT8_MAX) {
2734         env->vxsat = 0x1;
2735         return UINT8_MAX;
2736     } else {
2737         return res;
2738     }
2739 }
2740 
2741 static inline uint16_t
2742 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2743 {
2744     uint8_t round, shift = b & 0x1f;
2745     uint32_t res;
2746 
2747     round = get_round(vxrm, a, shift);
2748     res   = (a >> shift)  + round;
2749     if (res > UINT16_MAX) {
2750         env->vxsat = 0x1;
2751         return UINT16_MAX;
2752     } else {
2753         return res;
2754     }
2755 }
2756 
2757 static inline uint32_t
2758 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2759 {
2760     uint8_t round, shift = b & 0x3f;
2761     uint64_t res;
2762 
2763     round = get_round(vxrm, a, shift);
2764     res   = (a >> shift)  + round;
2765     if (res > UINT32_MAX) {
2766         env->vxsat = 0x1;
2767         return UINT32_MAX;
2768     } else {
2769         return res;
2770     }
2771 }
2772 
2773 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2774 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2775 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2776 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2777 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2778 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2779 
2780 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2781 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2782 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2783 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2784 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2785 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2786 
2787 /*
2788  *** Vector Float Point Arithmetic Instructions
2789  */
2790 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2791 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2792 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2793                       CPURISCVState *env)                      \
2794 {                                                              \
2795     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2796     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2797     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2798 }
2799 
2800 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2801 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2802                   void *vs2, CPURISCVState *env,          \
2803                   uint32_t desc)                          \
2804 {                                                         \
2805     uint32_t vm = vext_vm(desc);                          \
2806     uint32_t vl = env->vl;                                \
2807     uint32_t i;                                           \
2808                                                           \
2809     for (i = env->vstart; i < vl; i++) {                  \
2810         if (!vm && !vext_elem_mask(v0, i)) {              \
2811             continue;                                     \
2812         }                                                 \
2813         do_##NAME(vd, vs1, vs2, i, env);                  \
2814     }                                                     \
2815     env->vstart = 0;                                      \
2816 }
2817 
2818 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2819 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2820 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2821 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2822 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2823 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2824 
2825 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2826 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2827                       CPURISCVState *env)                      \
2828 {                                                              \
2829     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2830     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2831 }
2832 
2833 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2834 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2835                   void *vs2, CPURISCVState *env,          \
2836                   uint32_t desc)                          \
2837 {                                                         \
2838     uint32_t vm = vext_vm(desc);                          \
2839     uint32_t vl = env->vl;                                \
2840     uint32_t i;                                           \
2841                                                           \
2842     for (i = env->vstart; i < vl; i++) {                  \
2843         if (!vm && !vext_elem_mask(v0, i)) {              \
2844             continue;                                     \
2845         }                                                 \
2846         do_##NAME(vd, s1, vs2, i, env);                   \
2847     }                                                     \
2848     env->vstart = 0;                                      \
2849 }
2850 
2851 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2852 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2853 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2854 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2855 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2856 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2857 
2858 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2859 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2860 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2861 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2862 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2863 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2864 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2865 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2866 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2867 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2868 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2869 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2870 
2871 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2872 {
2873     return float16_sub(b, a, s);
2874 }
2875 
2876 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2877 {
2878     return float32_sub(b, a, s);
2879 }
2880 
2881 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2882 {
2883     return float64_sub(b, a, s);
2884 }
2885 
2886 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2887 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2888 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2889 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2890 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2891 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2892 
2893 /* Vector Widening Floating-Point Add/Subtract Instructions */
2894 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2895 {
2896     return float32_add(float16_to_float32(a, true, s),
2897             float16_to_float32(b, true, s), s);
2898 }
2899 
2900 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2901 {
2902     return float64_add(float32_to_float64(a, s),
2903             float32_to_float64(b, s), s);
2904 
2905 }
2906 
2907 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2908 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2909 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2910 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2911 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2912 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2913 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2914 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2915 
2916 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2917 {
2918     return float32_sub(float16_to_float32(a, true, s),
2919             float16_to_float32(b, true, s), s);
2920 }
2921 
2922 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2923 {
2924     return float64_sub(float32_to_float64(a, s),
2925             float32_to_float64(b, s), s);
2926 
2927 }
2928 
2929 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2930 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2931 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2932 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2933 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2934 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2935 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2936 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2937 
2938 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2939 {
2940     return float32_add(a, float16_to_float32(b, true, s), s);
2941 }
2942 
2943 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2944 {
2945     return float64_add(a, float32_to_float64(b, s), s);
2946 }
2947 
2948 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2949 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2950 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2951 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2952 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2953 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2954 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2955 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2956 
2957 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2958 {
2959     return float32_sub(a, float16_to_float32(b, true, s), s);
2960 }
2961 
2962 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2963 {
2964     return float64_sub(a, float32_to_float64(b, s), s);
2965 }
2966 
2967 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2968 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2969 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2970 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2971 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2972 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2973 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2974 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2975 
2976 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2977 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2978 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2979 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2980 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2981 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2982 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2983 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2984 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2985 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2986 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2987 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2988 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2989 
2990 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2991 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2992 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
2993 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
2994 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
2995 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
2996 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
2997 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
2998 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
2999 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
3000 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3001 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3002 
3003 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3004 {
3005     return float16_div(b, a, s);
3006 }
3007 
3008 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3009 {
3010     return float32_div(b, a, s);
3011 }
3012 
3013 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3014 {
3015     return float64_div(b, a, s);
3016 }
3017 
3018 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3019 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3020 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3021 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3022 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3023 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3024 
3025 /* Vector Widening Floating-Point Multiply */
3026 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3027 {
3028     return float32_mul(float16_to_float32(a, true, s),
3029             float16_to_float32(b, true, s), s);
3030 }
3031 
3032 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3033 {
3034     return float64_mul(float32_to_float64(a, s),
3035             float32_to_float64(b, s), s);
3036 
3037 }
3038 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3039 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3040 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3041 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3042 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3043 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3044 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3045 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3046 
3047 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3048 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3049 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3050         CPURISCVState *env)                                        \
3051 {                                                                  \
3052     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3053     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3054     TD d = *((TD *)vd + HD(i));                                    \
3055     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3056 }
3057 
3058 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3059 {
3060     return float16_muladd(a, b, d, 0, s);
3061 }
3062 
3063 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3064 {
3065     return float32_muladd(a, b, d, 0, s);
3066 }
3067 
3068 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3069 {
3070     return float64_muladd(a, b, d, 0, s);
3071 }
3072 
3073 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3074 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3075 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3076 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3077 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3078 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3079 
3080 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3081 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3082         CPURISCVState *env)                                       \
3083 {                                                                 \
3084     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3085     TD d = *((TD *)vd + HD(i));                                   \
3086     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3087 }
3088 
3089 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3090 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3091 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3092 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3093 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3094 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3095 
3096 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3097 {
3098     return float16_muladd(a, b, d,
3099             float_muladd_negate_c | float_muladd_negate_product, s);
3100 }
3101 
3102 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3103 {
3104     return float32_muladd(a, b, d,
3105             float_muladd_negate_c | float_muladd_negate_product, s);
3106 }
3107 
3108 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3109 {
3110     return float64_muladd(a, b, d,
3111             float_muladd_negate_c | float_muladd_negate_product, s);
3112 }
3113 
3114 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3115 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3116 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3117 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3118 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3119 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3120 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3121 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3122 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3123 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3124 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3125 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3126 
3127 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3128 {
3129     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3130 }
3131 
3132 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3133 {
3134     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3135 }
3136 
3137 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3138 {
3139     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3140 }
3141 
3142 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3143 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3144 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3145 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3146 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3147 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3148 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3149 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3150 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3151 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3152 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3153 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3154 
3155 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3156 {
3157     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3158 }
3159 
3160 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3161 {
3162     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3163 }
3164 
3165 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3166 {
3167     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3168 }
3169 
3170 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3171 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3172 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3173 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3174 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3175 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3176 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3177 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3178 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3179 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3180 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3181 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3182 
3183 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3184 {
3185     return float16_muladd(d, b, a, 0, s);
3186 }
3187 
3188 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3189 {
3190     return float32_muladd(d, b, a, 0, s);
3191 }
3192 
3193 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3194 {
3195     return float64_muladd(d, b, a, 0, s);
3196 }
3197 
3198 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3199 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3200 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3201 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3202 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3203 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3204 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3205 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3206 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3207 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3208 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3209 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3210 
3211 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3212 {
3213     return float16_muladd(d, b, a,
3214             float_muladd_negate_c | float_muladd_negate_product, s);
3215 }
3216 
3217 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3218 {
3219     return float32_muladd(d, b, a,
3220             float_muladd_negate_c | float_muladd_negate_product, s);
3221 }
3222 
3223 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3224 {
3225     return float64_muladd(d, b, a,
3226             float_muladd_negate_c | float_muladd_negate_product, s);
3227 }
3228 
3229 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3230 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3231 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3232 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3233 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3234 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3235 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3236 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3237 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3238 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3239 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3240 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3241 
3242 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3243 {
3244     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3245 }
3246 
3247 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3248 {
3249     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3250 }
3251 
3252 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3253 {
3254     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3255 }
3256 
3257 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3258 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3259 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3260 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3261 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3262 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3263 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3264 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3265 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3266 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3267 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3268 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3269 
3270 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3271 {
3272     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3273 }
3274 
3275 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3276 {
3277     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3278 }
3279 
3280 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3281 {
3282     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3283 }
3284 
3285 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3286 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3287 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3288 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3289 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3290 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3291 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3292 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3293 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3294 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3295 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3296 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3297 
3298 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3299 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3300 {
3301     return float32_muladd(float16_to_float32(a, true, s),
3302                         float16_to_float32(b, true, s), d, 0, s);
3303 }
3304 
3305 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3306 {
3307     return float64_muladd(float32_to_float64(a, s),
3308                         float32_to_float64(b, s), d, 0, s);
3309 }
3310 
3311 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3312 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3313 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3314 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3315 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3316 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3317 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3318 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3319 
3320 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3321 {
3322     return float32_muladd(float16_to_float32(a, true, s),
3323                         float16_to_float32(b, true, s), d,
3324                         float_muladd_negate_c | float_muladd_negate_product, s);
3325 }
3326 
3327 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3328 {
3329     return float64_muladd(float32_to_float64(a, s),
3330                         float32_to_float64(b, s), d,
3331                         float_muladd_negate_c | float_muladd_negate_product, s);
3332 }
3333 
3334 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3335 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3336 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3337 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3338 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3339 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3340 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3341 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3342 
3343 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3344 {
3345     return float32_muladd(float16_to_float32(a, true, s),
3346                         float16_to_float32(b, true, s), d,
3347                         float_muladd_negate_c, s);
3348 }
3349 
3350 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3351 {
3352     return float64_muladd(float32_to_float64(a, s),
3353                         float32_to_float64(b, s), d,
3354                         float_muladd_negate_c, s);
3355 }
3356 
3357 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3358 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3359 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3360 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3361 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3362 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3363 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3364 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3365 
3366 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3367 {
3368     return float32_muladd(float16_to_float32(a, true, s),
3369                         float16_to_float32(b, true, s), d,
3370                         float_muladd_negate_product, s);
3371 }
3372 
3373 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3374 {
3375     return float64_muladd(float32_to_float64(a, s),
3376                         float32_to_float64(b, s), d,
3377                         float_muladd_negate_product, s);
3378 }
3379 
3380 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3381 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3382 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3383 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3384 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3385 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3386 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3387 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3388 
3389 /* Vector Floating-Point Square-Root Instruction */
3390 /* (TD, T2, TX2) */
3391 #define OP_UU_H uint16_t, uint16_t, uint16_t
3392 #define OP_UU_W uint32_t, uint32_t, uint32_t
3393 #define OP_UU_D uint64_t, uint64_t, uint64_t
3394 
3395 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3396 static void do_##NAME(void *vd, void *vs2, int i,      \
3397         CPURISCVState *env)                            \
3398 {                                                      \
3399     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3400     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3401 }
3402 
3403 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3404 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3405         CPURISCVState *env, uint32_t desc)             \
3406 {                                                      \
3407     uint32_t vm = vext_vm(desc);                       \
3408     uint32_t vl = env->vl;                             \
3409     uint32_t i;                                        \
3410                                                        \
3411     if (vl == 0) {                                     \
3412         return;                                        \
3413     }                                                  \
3414     for (i = env->vstart; i < vl; i++) {               \
3415         if (!vm && !vext_elem_mask(v0, i)) {           \
3416             continue;                                  \
3417         }                                              \
3418         do_##NAME(vd, vs2, i, env);                    \
3419     }                                                  \
3420     env->vstart = 0;                                   \
3421 }
3422 
3423 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3424 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3425 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3426 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3427 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3428 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3429 
3430 /*
3431  * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3432  *
3433  * Adapted from riscv-v-spec recip.c:
3434  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3435  */
3436 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3437 {
3438     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3439     uint64_t exp = extract64(f, frac_size, exp_size);
3440     uint64_t frac = extract64(f, 0, frac_size);
3441 
3442     const uint8_t lookup_table[] = {
3443         52, 51, 50, 48, 47, 46, 44, 43,
3444         42, 41, 40, 39, 38, 36, 35, 34,
3445         33, 32, 31, 30, 30, 29, 28, 27,
3446         26, 25, 24, 23, 23, 22, 21, 20,
3447         19, 19, 18, 17, 16, 16, 15, 14,
3448         14, 13, 12, 12, 11, 10, 10, 9,
3449         9, 8, 7, 7, 6, 6, 5, 4,
3450         4, 3, 3, 2, 2, 1, 1, 0,
3451         127, 125, 123, 121, 119, 118, 116, 114,
3452         113, 111, 109, 108, 106, 105, 103, 102,
3453         100, 99, 97, 96, 95, 93, 92, 91,
3454         90, 88, 87, 86, 85, 84, 83, 82,
3455         80, 79, 78, 77, 76, 75, 74, 73,
3456         72, 71, 70, 70, 69, 68, 67, 66,
3457         65, 64, 63, 63, 62, 61, 60, 59,
3458         59, 58, 57, 56, 56, 55, 54, 53
3459     };
3460     const int precision = 7;
3461 
3462     if (exp == 0 && frac != 0) { /* subnormal */
3463         /* Normalize the subnormal. */
3464         while (extract64(frac, frac_size - 1, 1) == 0) {
3465             exp--;
3466             frac <<= 1;
3467         }
3468 
3469         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3470     }
3471 
3472     int idx = ((exp & 1) << (precision - 1)) |
3473                 (frac >> (frac_size - precision + 1));
3474     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3475                             (frac_size - precision);
3476     uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3477 
3478     uint64_t val = 0;
3479     val = deposit64(val, 0, frac_size, out_frac);
3480     val = deposit64(val, frac_size, exp_size, out_exp);
3481     val = deposit64(val, frac_size + exp_size, 1, sign);
3482     return val;
3483 }
3484 
3485 static float16 frsqrt7_h(float16 f, float_status *s)
3486 {
3487     int exp_size = 5, frac_size = 10;
3488     bool sign = float16_is_neg(f);
3489 
3490     /*
3491      * frsqrt7(sNaN) = canonical NaN
3492      * frsqrt7(-inf) = canonical NaN
3493      * frsqrt7(-normal) = canonical NaN
3494      * frsqrt7(-subnormal) = canonical NaN
3495      */
3496     if (float16_is_signaling_nan(f, s) ||
3497             (float16_is_infinity(f) && sign) ||
3498             (float16_is_normal(f) && sign) ||
3499             (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3500         s->float_exception_flags |= float_flag_invalid;
3501         return float16_default_nan(s);
3502     }
3503 
3504     /* frsqrt7(qNaN) = canonical NaN */
3505     if (float16_is_quiet_nan(f, s)) {
3506         return float16_default_nan(s);
3507     }
3508 
3509     /* frsqrt7(+-0) = +-inf */
3510     if (float16_is_zero(f)) {
3511         s->float_exception_flags |= float_flag_divbyzero;
3512         return float16_set_sign(float16_infinity, sign);
3513     }
3514 
3515     /* frsqrt7(+inf) = +0 */
3516     if (float16_is_infinity(f) && !sign) {
3517         return float16_set_sign(float16_zero, sign);
3518     }
3519 
3520     /* +normal, +subnormal */
3521     uint64_t val = frsqrt7(f, exp_size, frac_size);
3522     return make_float16(val);
3523 }
3524 
3525 static float32 frsqrt7_s(float32 f, float_status *s)
3526 {
3527     int exp_size = 8, frac_size = 23;
3528     bool sign = float32_is_neg(f);
3529 
3530     /*
3531      * frsqrt7(sNaN) = canonical NaN
3532      * frsqrt7(-inf) = canonical NaN
3533      * frsqrt7(-normal) = canonical NaN
3534      * frsqrt7(-subnormal) = canonical NaN
3535      */
3536     if (float32_is_signaling_nan(f, s) ||
3537             (float32_is_infinity(f) && sign) ||
3538             (float32_is_normal(f) && sign) ||
3539             (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3540         s->float_exception_flags |= float_flag_invalid;
3541         return float32_default_nan(s);
3542     }
3543 
3544     /* frsqrt7(qNaN) = canonical NaN */
3545     if (float32_is_quiet_nan(f, s)) {
3546         return float32_default_nan(s);
3547     }
3548 
3549     /* frsqrt7(+-0) = +-inf */
3550     if (float32_is_zero(f)) {
3551         s->float_exception_flags |= float_flag_divbyzero;
3552         return float32_set_sign(float32_infinity, sign);
3553     }
3554 
3555     /* frsqrt7(+inf) = +0 */
3556     if (float32_is_infinity(f) && !sign) {
3557         return float32_set_sign(float32_zero, sign);
3558     }
3559 
3560     /* +normal, +subnormal */
3561     uint64_t val = frsqrt7(f, exp_size, frac_size);
3562     return make_float32(val);
3563 }
3564 
3565 static float64 frsqrt7_d(float64 f, float_status *s)
3566 {
3567     int exp_size = 11, frac_size = 52;
3568     bool sign = float64_is_neg(f);
3569 
3570     /*
3571      * frsqrt7(sNaN) = canonical NaN
3572      * frsqrt7(-inf) = canonical NaN
3573      * frsqrt7(-normal) = canonical NaN
3574      * frsqrt7(-subnormal) = canonical NaN
3575      */
3576     if (float64_is_signaling_nan(f, s) ||
3577             (float64_is_infinity(f) && sign) ||
3578             (float64_is_normal(f) && sign) ||
3579             (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3580         s->float_exception_flags |= float_flag_invalid;
3581         return float64_default_nan(s);
3582     }
3583 
3584     /* frsqrt7(qNaN) = canonical NaN */
3585     if (float64_is_quiet_nan(f, s)) {
3586         return float64_default_nan(s);
3587     }
3588 
3589     /* frsqrt7(+-0) = +-inf */
3590     if (float64_is_zero(f)) {
3591         s->float_exception_flags |= float_flag_divbyzero;
3592         return float64_set_sign(float64_infinity, sign);
3593     }
3594 
3595     /* frsqrt7(+inf) = +0 */
3596     if (float64_is_infinity(f) && !sign) {
3597         return float64_set_sign(float64_zero, sign);
3598     }
3599 
3600     /* +normal, +subnormal */
3601     uint64_t val = frsqrt7(f, exp_size, frac_size);
3602     return make_float64(val);
3603 }
3604 
3605 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3606 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3607 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3608 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
3609 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
3610 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
3611 
3612 /*
3613  * Vector Floating-Point Reciprocal Estimate Instruction
3614  *
3615  * Adapted from riscv-v-spec recip.c:
3616  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3617  */
3618 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3619                       float_status *s)
3620 {
3621     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3622     uint64_t exp = extract64(f, frac_size, exp_size);
3623     uint64_t frac = extract64(f, 0, frac_size);
3624 
3625     const uint8_t lookup_table[] = {
3626         127, 125, 123, 121, 119, 117, 116, 114,
3627         112, 110, 109, 107, 105, 104, 102, 100,
3628         99, 97, 96, 94, 93, 91, 90, 88,
3629         87, 85, 84, 83, 81, 80, 79, 77,
3630         76, 75, 74, 72, 71, 70, 69, 68,
3631         66, 65, 64, 63, 62, 61, 60, 59,
3632         58, 57, 56, 55, 54, 53, 52, 51,
3633         50, 49, 48, 47, 46, 45, 44, 43,
3634         42, 41, 40, 40, 39, 38, 37, 36,
3635         35, 35, 34, 33, 32, 31, 31, 30,
3636         29, 28, 28, 27, 26, 25, 25, 24,
3637         23, 23, 22, 21, 21, 20, 19, 19,
3638         18, 17, 17, 16, 15, 15, 14, 14,
3639         13, 12, 12, 11, 11, 10, 9, 9,
3640         8, 8, 7, 7, 6, 5, 5, 4,
3641         4, 3, 3, 2, 2, 1, 1, 0
3642     };
3643     const int precision = 7;
3644 
3645     if (exp == 0 && frac != 0) { /* subnormal */
3646         /* Normalize the subnormal. */
3647         while (extract64(frac, frac_size - 1, 1) == 0) {
3648             exp--;
3649             frac <<= 1;
3650         }
3651 
3652         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3653 
3654         if (exp != 0 && exp != UINT64_MAX) {
3655             /*
3656              * Overflow to inf or max value of same sign,
3657              * depending on sign and rounding mode.
3658              */
3659             s->float_exception_flags |= (float_flag_inexact |
3660                                          float_flag_overflow);
3661 
3662             if ((s->float_rounding_mode == float_round_to_zero) ||
3663                 ((s->float_rounding_mode == float_round_down) && !sign) ||
3664                 ((s->float_rounding_mode == float_round_up) && sign)) {
3665                 /* Return greatest/negative finite value. */
3666                 return (sign << (exp_size + frac_size)) |
3667                     (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3668             } else {
3669                 /* Return +-inf. */
3670                 return (sign << (exp_size + frac_size)) |
3671                     MAKE_64BIT_MASK(frac_size, exp_size);
3672             }
3673         }
3674     }
3675 
3676     int idx = frac >> (frac_size - precision);
3677     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3678                             (frac_size - precision);
3679     uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3680 
3681     if (out_exp == 0 || out_exp == UINT64_MAX) {
3682         /*
3683          * The result is subnormal, but don't raise the underflow exception,
3684          * because there's no additional loss of precision.
3685          */
3686         out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3687         if (out_exp == UINT64_MAX) {
3688             out_frac >>= 1;
3689             out_exp = 0;
3690         }
3691     }
3692 
3693     uint64_t val = 0;
3694     val = deposit64(val, 0, frac_size, out_frac);
3695     val = deposit64(val, frac_size, exp_size, out_exp);
3696     val = deposit64(val, frac_size + exp_size, 1, sign);
3697     return val;
3698 }
3699 
3700 static float16 frec7_h(float16 f, float_status *s)
3701 {
3702     int exp_size = 5, frac_size = 10;
3703     bool sign = float16_is_neg(f);
3704 
3705     /* frec7(+-inf) = +-0 */
3706     if (float16_is_infinity(f)) {
3707         return float16_set_sign(float16_zero, sign);
3708     }
3709 
3710     /* frec7(+-0) = +-inf */
3711     if (float16_is_zero(f)) {
3712         s->float_exception_flags |= float_flag_divbyzero;
3713         return float16_set_sign(float16_infinity, sign);
3714     }
3715 
3716     /* frec7(sNaN) = canonical NaN */
3717     if (float16_is_signaling_nan(f, s)) {
3718         s->float_exception_flags |= float_flag_invalid;
3719         return float16_default_nan(s);
3720     }
3721 
3722     /* frec7(qNaN) = canonical NaN */
3723     if (float16_is_quiet_nan(f, s)) {
3724         return float16_default_nan(s);
3725     }
3726 
3727     /* +-normal, +-subnormal */
3728     uint64_t val = frec7(f, exp_size, frac_size, s);
3729     return make_float16(val);
3730 }
3731 
3732 static float32 frec7_s(float32 f, float_status *s)
3733 {
3734     int exp_size = 8, frac_size = 23;
3735     bool sign = float32_is_neg(f);
3736 
3737     /* frec7(+-inf) = +-0 */
3738     if (float32_is_infinity(f)) {
3739         return float32_set_sign(float32_zero, sign);
3740     }
3741 
3742     /* frec7(+-0) = +-inf */
3743     if (float32_is_zero(f)) {
3744         s->float_exception_flags |= float_flag_divbyzero;
3745         return float32_set_sign(float32_infinity, sign);
3746     }
3747 
3748     /* frec7(sNaN) = canonical NaN */
3749     if (float32_is_signaling_nan(f, s)) {
3750         s->float_exception_flags |= float_flag_invalid;
3751         return float32_default_nan(s);
3752     }
3753 
3754     /* frec7(qNaN) = canonical NaN */
3755     if (float32_is_quiet_nan(f, s)) {
3756         return float32_default_nan(s);
3757     }
3758 
3759     /* +-normal, +-subnormal */
3760     uint64_t val = frec7(f, exp_size, frac_size, s);
3761     return make_float32(val);
3762 }
3763 
3764 static float64 frec7_d(float64 f, float_status *s)
3765 {
3766     int exp_size = 11, frac_size = 52;
3767     bool sign = float64_is_neg(f);
3768 
3769     /* frec7(+-inf) = +-0 */
3770     if (float64_is_infinity(f)) {
3771         return float64_set_sign(float64_zero, sign);
3772     }
3773 
3774     /* frec7(+-0) = +-inf */
3775     if (float64_is_zero(f)) {
3776         s->float_exception_flags |= float_flag_divbyzero;
3777         return float64_set_sign(float64_infinity, sign);
3778     }
3779 
3780     /* frec7(sNaN) = canonical NaN */
3781     if (float64_is_signaling_nan(f, s)) {
3782         s->float_exception_flags |= float_flag_invalid;
3783         return float64_default_nan(s);
3784     }
3785 
3786     /* frec7(qNaN) = canonical NaN */
3787     if (float64_is_quiet_nan(f, s)) {
3788         return float64_default_nan(s);
3789     }
3790 
3791     /* +-normal, +-subnormal */
3792     uint64_t val = frec7(f, exp_size, frac_size, s);
3793     return make_float64(val);
3794 }
3795 
3796 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3797 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3798 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3799 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
3800 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
3801 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
3802 
3803 /* Vector Floating-Point MIN/MAX Instructions */
3804 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3805 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3806 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3807 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3808 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3809 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3810 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3811 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3812 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3813 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3814 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3815 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3816 
3817 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3818 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3819 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3820 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3821 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3822 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3823 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3824 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3825 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3826 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3827 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3828 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3829 
3830 /* Vector Floating-Point Sign-Injection Instructions */
3831 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3832 {
3833     return deposit64(b, 0, 15, a);
3834 }
3835 
3836 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3837 {
3838     return deposit64(b, 0, 31, a);
3839 }
3840 
3841 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3842 {
3843     return deposit64(b, 0, 63, a);
3844 }
3845 
3846 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3847 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3848 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3849 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3850 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3851 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3852 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3853 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3854 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3855 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3856 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3857 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3858 
3859 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3860 {
3861     return deposit64(~b, 0, 15, a);
3862 }
3863 
3864 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3865 {
3866     return deposit64(~b, 0, 31, a);
3867 }
3868 
3869 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3870 {
3871     return deposit64(~b, 0, 63, a);
3872 }
3873 
3874 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3875 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3876 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3877 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3878 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3879 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3880 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3881 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3882 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3883 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3884 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3885 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3886 
3887 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3888 {
3889     return deposit64(b ^ a, 0, 15, a);
3890 }
3891 
3892 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3893 {
3894     return deposit64(b ^ a, 0, 31, a);
3895 }
3896 
3897 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3898 {
3899     return deposit64(b ^ a, 0, 63, a);
3900 }
3901 
3902 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3903 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3904 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3905 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3906 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3907 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3908 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3909 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3910 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3911 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3912 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3913 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3914 
3915 /* Vector Floating-Point Compare Instructions */
3916 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3917 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3918                   CPURISCVState *env, uint32_t desc)          \
3919 {                                                             \
3920     uint32_t vm = vext_vm(desc);                              \
3921     uint32_t vl = env->vl;                                    \
3922     uint32_t i;                                               \
3923                                                               \
3924     for (i = env->vstart; i < vl; i++) {                      \
3925         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3926         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3927         if (!vm && !vext_elem_mask(v0, i)) {                  \
3928             continue;                                         \
3929         }                                                     \
3930         vext_set_elem_mask(vd, i,                             \
3931                            DO_OP(s2, s1, &env->fp_status));   \
3932     }                                                         \
3933     env->vstart = 0;                                          \
3934 }
3935 
3936 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3937 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3938 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3939 
3940 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3941 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3942                   CPURISCVState *env, uint32_t desc)                \
3943 {                                                                   \
3944     uint32_t vm = vext_vm(desc);                                    \
3945     uint32_t vl = env->vl;                                          \
3946     uint32_t i;                                                     \
3947                                                                     \
3948     for (i = env->vstart; i < vl; i++) {                            \
3949         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3950         if (!vm && !vext_elem_mask(v0, i)) {                        \
3951             continue;                                               \
3952         }                                                           \
3953         vext_set_elem_mask(vd, i,                                   \
3954                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3955     }                                                               \
3956     env->vstart = 0;                                                \
3957 }
3958 
3959 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3960 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3961 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3962 
3963 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3964 {
3965     FloatRelation compare = float16_compare_quiet(a, b, s);
3966     return compare != float_relation_equal;
3967 }
3968 
3969 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3970 {
3971     FloatRelation compare = float32_compare_quiet(a, b, s);
3972     return compare != float_relation_equal;
3973 }
3974 
3975 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3976 {
3977     FloatRelation compare = float64_compare_quiet(a, b, s);
3978     return compare != float_relation_equal;
3979 }
3980 
3981 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3982 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3983 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3984 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3985 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3986 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3987 
3988 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3989 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3990 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3991 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3992 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3993 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3994 
3995 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3996 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
3997 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
3998 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
3999 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4000 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4001 
4002 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4003 {
4004     FloatRelation compare = float16_compare(a, b, s);
4005     return compare == float_relation_greater;
4006 }
4007 
4008 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4009 {
4010     FloatRelation compare = float32_compare(a, b, s);
4011     return compare == float_relation_greater;
4012 }
4013 
4014 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4015 {
4016     FloatRelation compare = float64_compare(a, b, s);
4017     return compare == float_relation_greater;
4018 }
4019 
4020 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4021 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4022 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4023 
4024 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4025 {
4026     FloatRelation compare = float16_compare(a, b, s);
4027     return compare == float_relation_greater ||
4028            compare == float_relation_equal;
4029 }
4030 
4031 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4032 {
4033     FloatRelation compare = float32_compare(a, b, s);
4034     return compare == float_relation_greater ||
4035            compare == float_relation_equal;
4036 }
4037 
4038 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4039 {
4040     FloatRelation compare = float64_compare(a, b, s);
4041     return compare == float_relation_greater ||
4042            compare == float_relation_equal;
4043 }
4044 
4045 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4046 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4047 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4048 
4049 /* Vector Floating-Point Classify Instruction */
4050 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4051 static void do_##NAME(void *vd, void *vs2, int i)      \
4052 {                                                      \
4053     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4054     *((TD *)vd + HD(i)) = OP(s2);                      \
4055 }
4056 
4057 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
4058 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4059                   CPURISCVState *env, uint32_t desc)   \
4060 {                                                      \
4061     uint32_t vm = vext_vm(desc);                       \
4062     uint32_t vl = env->vl;                             \
4063     uint32_t i;                                        \
4064                                                        \
4065     for (i = env->vstart; i < vl; i++) {               \
4066         if (!vm && !vext_elem_mask(v0, i)) {           \
4067             continue;                                  \
4068         }                                              \
4069         do_##NAME(vd, vs2, i);                         \
4070     }                                                  \
4071     env->vstart = 0;                                   \
4072 }
4073 
4074 target_ulong fclass_h(uint64_t frs1)
4075 {
4076     float16 f = frs1;
4077     bool sign = float16_is_neg(f);
4078 
4079     if (float16_is_infinity(f)) {
4080         return sign ? 1 << 0 : 1 << 7;
4081     } else if (float16_is_zero(f)) {
4082         return sign ? 1 << 3 : 1 << 4;
4083     } else if (float16_is_zero_or_denormal(f)) {
4084         return sign ? 1 << 2 : 1 << 5;
4085     } else if (float16_is_any_nan(f)) {
4086         float_status s = { }; /* for snan_bit_is_one */
4087         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4088     } else {
4089         return sign ? 1 << 1 : 1 << 6;
4090     }
4091 }
4092 
4093 target_ulong fclass_s(uint64_t frs1)
4094 {
4095     float32 f = frs1;
4096     bool sign = float32_is_neg(f);
4097 
4098     if (float32_is_infinity(f)) {
4099         return sign ? 1 << 0 : 1 << 7;
4100     } else if (float32_is_zero(f)) {
4101         return sign ? 1 << 3 : 1 << 4;
4102     } else if (float32_is_zero_or_denormal(f)) {
4103         return sign ? 1 << 2 : 1 << 5;
4104     } else if (float32_is_any_nan(f)) {
4105         float_status s = { }; /* for snan_bit_is_one */
4106         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4107     } else {
4108         return sign ? 1 << 1 : 1 << 6;
4109     }
4110 }
4111 
4112 target_ulong fclass_d(uint64_t frs1)
4113 {
4114     float64 f = frs1;
4115     bool sign = float64_is_neg(f);
4116 
4117     if (float64_is_infinity(f)) {
4118         return sign ? 1 << 0 : 1 << 7;
4119     } else if (float64_is_zero(f)) {
4120         return sign ? 1 << 3 : 1 << 4;
4121     } else if (float64_is_zero_or_denormal(f)) {
4122         return sign ? 1 << 2 : 1 << 5;
4123     } else if (float64_is_any_nan(f)) {
4124         float_status s = { }; /* for snan_bit_is_one */
4125         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4126     } else {
4127         return sign ? 1 << 1 : 1 << 6;
4128     }
4129 }
4130 
4131 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4132 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4133 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4134 GEN_VEXT_V(vfclass_v_h, 2, 2)
4135 GEN_VEXT_V(vfclass_v_w, 4, 4)
4136 GEN_VEXT_V(vfclass_v_d, 8, 8)
4137 
4138 /* Vector Floating-Point Merge Instruction */
4139 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
4140 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4141                   CPURISCVState *env, uint32_t desc)          \
4142 {                                                             \
4143     uint32_t vm = vext_vm(desc);                              \
4144     uint32_t vl = env->vl;                                    \
4145     uint32_t i;                                               \
4146                                                               \
4147     for (i = env->vstart; i < vl; i++) {                      \
4148         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4149         *((ETYPE *)vd + H(i))                                 \
4150           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
4151     }                                                         \
4152     env->vstart = 0;                                          \
4153 }
4154 
4155 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4156 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4157 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4158 
4159 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4160 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4161 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4162 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4163 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4164 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
4165 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
4166 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
4167 
4168 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4169 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4170 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4171 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4172 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
4173 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
4174 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
4175 
4176 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4177 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4178 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4179 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4180 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
4181 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
4182 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
4183 
4184 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4185 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4186 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4187 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4188 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
4189 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
4190 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4191 
4192 /* Widening Floating-Point/Integer Type-Convert Instructions */
4193 /* (TD, T2, TX2) */
4194 #define WOP_UU_B uint16_t, uint8_t,  uint8_t
4195 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4196 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4197 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4198 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4199 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4200 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
4201 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4202 
4203 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4204 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4205 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4206 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
4207 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4208 
4209 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4210 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4211 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4212 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4213 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
4214 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
4215 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4216 
4217 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4218 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4219 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4220 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4221 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
4222 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
4223 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4224 
4225 /*
4226  * vfwcvt.f.f.v vd, vs2, vm
4227  * Convert single-width float to double-width float.
4228  */
4229 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4230 {
4231     return float16_to_float32(a, true, s);
4232 }
4233 
4234 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4235 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4236 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
4237 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
4238 
4239 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4240 /* (TD, T2, TX2) */
4241 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
4242 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4243 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4244 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4245 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4246 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4247 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4248 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
4249 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
4250 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
4251 
4252 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4253 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4254 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4255 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4256 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
4257 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
4258 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
4259 
4260 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4261 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4262 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4263 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
4264 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
4265 
4266 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4267 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4268 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4269 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
4270 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
4271 
4272 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4273 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4274 {
4275     return float32_to_float16(a, true, s);
4276 }
4277 
4278 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4279 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4280 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
4281 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
4282 
4283 /*
4284  *** Vector Reduction Operations
4285  */
4286 /* Vector Single-Width Integer Reduction Instructions */
4287 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4288 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4289         void *vs2, CPURISCVState *env, uint32_t desc)     \
4290 {                                                         \
4291     uint32_t vm = vext_vm(desc);                          \
4292     uint32_t vl = env->vl;                                \
4293     uint32_t i;                                           \
4294     TD s1 =  *((TD *)vs1 + HD(0));                        \
4295                                                           \
4296     for (i = env->vstart; i < vl; i++) {                  \
4297         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4298         if (!vm && !vext_elem_mask(v0, i)) {              \
4299             continue;                                     \
4300         }                                                 \
4301         s1 = OP(s1, (TD)s2);                              \
4302     }                                                     \
4303     *((TD *)vd + HD(0)) = s1;                             \
4304     env->vstart = 0;                                      \
4305 }
4306 
4307 /* vd[0] = sum(vs1[0], vs2[*]) */
4308 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4309 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4310 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4311 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4312 
4313 /* vd[0] = maxu(vs1[0], vs2[*]) */
4314 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4315 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4316 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4317 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4318 
4319 /* vd[0] = max(vs1[0], vs2[*]) */
4320 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4321 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4322 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4323 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4324 
4325 /* vd[0] = minu(vs1[0], vs2[*]) */
4326 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4327 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4328 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4329 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4330 
4331 /* vd[0] = min(vs1[0], vs2[*]) */
4332 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4333 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4334 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4335 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4336 
4337 /* vd[0] = and(vs1[0], vs2[*]) */
4338 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4339 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4340 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4341 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4342 
4343 /* vd[0] = or(vs1[0], vs2[*]) */
4344 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4345 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4346 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4347 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4348 
4349 /* vd[0] = xor(vs1[0], vs2[*]) */
4350 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4351 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4352 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4353 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4354 
4355 /* Vector Widening Integer Reduction Instructions */
4356 /* signed sum reduction into double-width accumulator */
4357 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4358 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4359 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4360 
4361 /* Unsigned sum reduction into double-width accumulator */
4362 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4363 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4364 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4365 
4366 /* Vector Single-Width Floating-Point Reduction Instructions */
4367 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4368 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4369                   void *vs2, CPURISCVState *env,           \
4370                   uint32_t desc)                           \
4371 {                                                          \
4372     uint32_t vm = vext_vm(desc);                           \
4373     uint32_t vl = env->vl;                                 \
4374     uint32_t i;                                            \
4375     TD s1 =  *((TD *)vs1 + HD(0));                         \
4376                                                            \
4377     for (i = env->vstart; i < vl; i++) {                   \
4378         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4379         if (!vm && !vext_elem_mask(v0, i)) {               \
4380             continue;                                      \
4381         }                                                  \
4382         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4383     }                                                      \
4384     *((TD *)vd + HD(0)) = s1;                              \
4385     env->vstart = 0;                                       \
4386 }
4387 
4388 /* Unordered sum */
4389 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4390 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4391 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4392 
4393 /* Maximum value */
4394 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4395 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4396 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4397 
4398 /* Minimum value */
4399 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4400 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4401 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4402 
4403 /* Vector Widening Floating-Point Reduction Instructions */
4404 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4405 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4406                             void *vs2, CPURISCVState *env, uint32_t desc)
4407 {
4408     uint32_t vm = vext_vm(desc);
4409     uint32_t vl = env->vl;
4410     uint32_t i;
4411     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4412 
4413     for (i = env->vstart; i < vl; i++) {
4414         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4415         if (!vm && !vext_elem_mask(v0, i)) {
4416             continue;
4417         }
4418         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4419                          &env->fp_status);
4420     }
4421     *((uint32_t *)vd + H4(0)) = s1;
4422     env->vstart = 0;
4423 }
4424 
4425 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4426                             void *vs2, CPURISCVState *env, uint32_t desc)
4427 {
4428     uint32_t vm = vext_vm(desc);
4429     uint32_t vl = env->vl;
4430     uint32_t i;
4431     uint64_t s1 =  *((uint64_t *)vs1);
4432 
4433     for (i = env->vstart; i < vl; i++) {
4434         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4435         if (!vm && !vext_elem_mask(v0, i)) {
4436             continue;
4437         }
4438         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4439                          &env->fp_status);
4440     }
4441     *((uint64_t *)vd) = s1;
4442     env->vstart = 0;
4443 }
4444 
4445 /*
4446  *** Vector Mask Operations
4447  */
4448 /* Vector Mask-Register Logical Instructions */
4449 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4450 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4451                   void *vs2, CPURISCVState *env,          \
4452                   uint32_t desc)                          \
4453 {                                                         \
4454     uint32_t vl = env->vl;                                \
4455     uint32_t i;                                           \
4456     int a, b;                                             \
4457                                                           \
4458     for (i = env->vstart; i < vl; i++) {                  \
4459         a = vext_elem_mask(vs1, i);                       \
4460         b = vext_elem_mask(vs2, i);                       \
4461         vext_set_elem_mask(vd, i, OP(b, a));              \
4462     }                                                     \
4463     env->vstart = 0;                                      \
4464 }
4465 
4466 #define DO_NAND(N, M)  (!(N & M))
4467 #define DO_ANDNOT(N, M)  (N & !M)
4468 #define DO_NOR(N, M)  (!(N | M))
4469 #define DO_ORNOT(N, M)  (N | !M)
4470 #define DO_XNOR(N, M)  (!(N ^ M))
4471 
4472 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4473 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4474 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4475 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4476 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4477 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4478 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4479 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4480 
4481 /* Vector count population in mask vcpop */
4482 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4483                              uint32_t desc)
4484 {
4485     target_ulong cnt = 0;
4486     uint32_t vm = vext_vm(desc);
4487     uint32_t vl = env->vl;
4488     int i;
4489 
4490     for (i = env->vstart; i < vl; i++) {
4491         if (vm || vext_elem_mask(v0, i)) {
4492             if (vext_elem_mask(vs2, i)) {
4493                 cnt++;
4494             }
4495         }
4496     }
4497     env->vstart = 0;
4498     return cnt;
4499 }
4500 
4501 /* vfirst find-first-set mask bit*/
4502 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4503                               uint32_t desc)
4504 {
4505     uint32_t vm = vext_vm(desc);
4506     uint32_t vl = env->vl;
4507     int i;
4508 
4509     for (i = env->vstart; i < vl; i++) {
4510         if (vm || vext_elem_mask(v0, i)) {
4511             if (vext_elem_mask(vs2, i)) {
4512                 return i;
4513             }
4514         }
4515     }
4516     env->vstart = 0;
4517     return -1LL;
4518 }
4519 
4520 enum set_mask_type {
4521     ONLY_FIRST = 1,
4522     INCLUDE_FIRST,
4523     BEFORE_FIRST,
4524 };
4525 
4526 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4527                    uint32_t desc, enum set_mask_type type)
4528 {
4529     uint32_t vm = vext_vm(desc);
4530     uint32_t vl = env->vl;
4531     int i;
4532     bool first_mask_bit = false;
4533 
4534     for (i = env->vstart; i < vl; i++) {
4535         if (!vm && !vext_elem_mask(v0, i)) {
4536             continue;
4537         }
4538         /* write a zero to all following active elements */
4539         if (first_mask_bit) {
4540             vext_set_elem_mask(vd, i, 0);
4541             continue;
4542         }
4543         if (vext_elem_mask(vs2, i)) {
4544             first_mask_bit = true;
4545             if (type == BEFORE_FIRST) {
4546                 vext_set_elem_mask(vd, i, 0);
4547             } else {
4548                 vext_set_elem_mask(vd, i, 1);
4549             }
4550         } else {
4551             if (type == ONLY_FIRST) {
4552                 vext_set_elem_mask(vd, i, 0);
4553             } else {
4554                 vext_set_elem_mask(vd, i, 1);
4555             }
4556         }
4557     }
4558     env->vstart = 0;
4559 }
4560 
4561 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4562                      uint32_t desc)
4563 {
4564     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4565 }
4566 
4567 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4568                      uint32_t desc)
4569 {
4570     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4571 }
4572 
4573 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4574                      uint32_t desc)
4575 {
4576     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4577 }
4578 
4579 /* Vector Iota Instruction */
4580 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4581 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4582                   uint32_t desc)                                          \
4583 {                                                                         \
4584     uint32_t vm = vext_vm(desc);                                          \
4585     uint32_t vl = env->vl;                                                \
4586     uint32_t sum = 0;                                                     \
4587     int i;                                                                \
4588                                                                           \
4589     for (i = env->vstart; i < vl; i++) {                                  \
4590         if (!vm && !vext_elem_mask(v0, i)) {                              \
4591             continue;                                                     \
4592         }                                                                 \
4593         *((ETYPE *)vd + H(i)) = sum;                                      \
4594         if (vext_elem_mask(vs2, i)) {                                     \
4595             sum++;                                                        \
4596         }                                                                 \
4597     }                                                                     \
4598     env->vstart = 0;                                                      \
4599 }
4600 
4601 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4602 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4603 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4604 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4605 
4606 /* Vector Element Index Instruction */
4607 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4608 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4609 {                                                                         \
4610     uint32_t vm = vext_vm(desc);                                          \
4611     uint32_t vl = env->vl;                                                \
4612     int i;                                                                \
4613                                                                           \
4614     for (i = env->vstart; i < vl; i++) {                                  \
4615         if (!vm && !vext_elem_mask(v0, i)) {                              \
4616             continue;                                                     \
4617         }                                                                 \
4618         *((ETYPE *)vd + H(i)) = i;                                        \
4619     }                                                                     \
4620     env->vstart = 0;                                                      \
4621 }
4622 
4623 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4624 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4625 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4626 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4627 
4628 /*
4629  *** Vector Permutation Instructions
4630  */
4631 
4632 /* Vector Slide Instructions */
4633 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4634 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4635                   CPURISCVState *env, uint32_t desc)                      \
4636 {                                                                         \
4637     uint32_t vm = vext_vm(desc);                                          \
4638     uint32_t vl = env->vl;                                                \
4639     target_ulong offset = s1, i_min, i;                                   \
4640                                                                           \
4641     i_min = MAX(env->vstart, offset);                                     \
4642     for (i = i_min; i < vl; i++) {                                        \
4643         if (!vm && !vext_elem_mask(v0, i)) {                              \
4644             continue;                                                     \
4645         }                                                                 \
4646         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4647     }                                                                     \
4648 }
4649 
4650 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4651 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4652 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4653 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4654 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4655 
4656 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4657 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4658                   CPURISCVState *env, uint32_t desc)                      \
4659 {                                                                         \
4660     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4661     uint32_t vm = vext_vm(desc);                                          \
4662     uint32_t vl = env->vl;                                                \
4663     target_ulong i_max, i;                                                \
4664                                                                           \
4665     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4666     for (i = env->vstart; i < i_max; ++i) {                               \
4667         if (vm || vext_elem_mask(v0, i)) {                                \
4668             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4669         }                                                                 \
4670     }                                                                     \
4671                                                                           \
4672     for (i = i_max; i < vl; ++i) {                                        \
4673         if (vm || vext_elem_mask(v0, i)) {                                \
4674             *((ETYPE *)vd + H(i)) = 0;                                    \
4675         }                                                                 \
4676     }                                                                     \
4677                                                                           \
4678     env->vstart = 0;                                                      \
4679 }
4680 
4681 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4682 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4683 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4684 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4685 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4686 
4687 #define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4688 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4689                      CPURISCVState *env, uint32_t desc)                     \
4690 {                                                                           \
4691     typedef uint##ESZ##_t ETYPE;                                            \
4692     uint32_t vm = vext_vm(desc);                                            \
4693     uint32_t vl = env->vl;                                                  \
4694     uint32_t i;                                                             \
4695                                                                             \
4696     for (i = env->vstart; i < vl; i++) {                                    \
4697         if (!vm && !vext_elem_mask(v0, i)) {                                \
4698             continue;                                                       \
4699         }                                                                   \
4700         if (i == 0) {                                                       \
4701             *((ETYPE *)vd + H(i)) = s1;                                     \
4702         } else {                                                            \
4703             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4704         }                                                                   \
4705     }                                                                       \
4706     env->vstart = 0;                                                        \
4707 }
4708 
4709 GEN_VEXT_VSLIE1UP(8,  H1)
4710 GEN_VEXT_VSLIE1UP(16, H2)
4711 GEN_VEXT_VSLIE1UP(32, H4)
4712 GEN_VEXT_VSLIE1UP(64, H8)
4713 
4714 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4715 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4716                   CPURISCVState *env, uint32_t desc)              \
4717 {                                                                 \
4718     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4719 }
4720 
4721 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4722 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4723 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4724 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4725 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4726 
4727 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4728 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4729                        CPURISCVState *env, uint32_t desc)                     \
4730 {                                                                             \
4731     typedef uint##ESZ##_t ETYPE;                                              \
4732     uint32_t vm = vext_vm(desc);                                              \
4733     uint32_t vl = env->vl;                                                    \
4734     uint32_t i;                                                               \
4735                                                                               \
4736     for (i = env->vstart; i < vl; i++) {                                      \
4737         if (!vm && !vext_elem_mask(v0, i)) {                                  \
4738             continue;                                                         \
4739         }                                                                     \
4740         if (i == vl - 1) {                                                    \
4741             *((ETYPE *)vd + H(i)) = s1;                                       \
4742         } else {                                                              \
4743             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4744         }                                                                     \
4745     }                                                                         \
4746     env->vstart = 0;                                                          \
4747 }
4748 
4749 GEN_VEXT_VSLIDE1DOWN(8,  H1)
4750 GEN_VEXT_VSLIDE1DOWN(16, H2)
4751 GEN_VEXT_VSLIDE1DOWN(32, H4)
4752 GEN_VEXT_VSLIDE1DOWN(64, H8)
4753 
4754 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4755 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4756                   CPURISCVState *env, uint32_t desc)              \
4757 {                                                                 \
4758     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4759 }
4760 
4761 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4762 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4763 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4764 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4765 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4766 
4767 /* Vector Floating-Point Slide Instructions */
4768 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4769 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4770                   CPURISCVState *env, uint32_t desc)          \
4771 {                                                             \
4772     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4773 }
4774 
4775 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4776 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4777 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4778 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4779 
4780 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4781 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4782                   CPURISCVState *env, uint32_t desc)          \
4783 {                                                             \
4784     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4785 }
4786 
4787 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4788 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4789 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4790 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4791 
4792 /* Vector Register Gather Instruction */
4793 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4794 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4795                   CPURISCVState *env, uint32_t desc)                      \
4796 {                                                                         \
4797     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4798     uint32_t vm = vext_vm(desc);                                          \
4799     uint32_t vl = env->vl;                                                \
4800     uint64_t index;                                                       \
4801     uint32_t i;                                                           \
4802                                                                           \
4803     for (i = env->vstart; i < vl; i++) {                                  \
4804         if (!vm && !vext_elem_mask(v0, i)) {                              \
4805             continue;                                                     \
4806         }                                                                 \
4807         index = *((TS1 *)vs1 + HS1(i));                                   \
4808         if (index >= vlmax) {                                             \
4809             *((TS2 *)vd + HS2(i)) = 0;                                    \
4810         } else {                                                          \
4811             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4812         }                                                                 \
4813     }                                                                     \
4814     env->vstart = 0;                                                      \
4815 }
4816 
4817 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4818 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4819 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4820 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4821 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4822 
4823 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4824 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4825 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4826 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4827 
4828 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4829 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4830                   CPURISCVState *env, uint32_t desc)                      \
4831 {                                                                         \
4832     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4833     uint32_t vm = vext_vm(desc);                                          \
4834     uint32_t vl = env->vl;                                                \
4835     uint64_t index = s1;                                                  \
4836     uint32_t i;                                                           \
4837                                                                           \
4838     for (i = env->vstart; i < vl; i++) {                                  \
4839         if (!vm && !vext_elem_mask(v0, i)) {                              \
4840             continue;                                                     \
4841         }                                                                 \
4842         if (index >= vlmax) {                                             \
4843             *((ETYPE *)vd + H(i)) = 0;                                    \
4844         } else {                                                          \
4845             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4846         }                                                                 \
4847     }                                                                     \
4848     env->vstart = 0;                                                      \
4849 }
4850 
4851 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4852 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4853 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4854 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4855 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4856 
4857 /* Vector Compress Instruction */
4858 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4859 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4860                   CPURISCVState *env, uint32_t desc)                      \
4861 {                                                                         \
4862     uint32_t vl = env->vl;                                                \
4863     uint32_t num = 0, i;                                                  \
4864                                                                           \
4865     for (i = env->vstart; i < vl; i++) {                                  \
4866         if (!vext_elem_mask(vs1, i)) {                                    \
4867             continue;                                                     \
4868         }                                                                 \
4869         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4870         num++;                                                            \
4871     }                                                                     \
4872     env->vstart = 0;                                                      \
4873 }
4874 
4875 /* Compress into vd elements of vs2 where vs1 is enabled */
4876 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4877 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4878 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4879 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4880 
4881 /* Vector Whole Register Move */
4882 #define GEN_VEXT_VMV_WHOLE(NAME, LEN)                      \
4883 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
4884                   uint32_t desc)                           \
4885 {                                                          \
4886     /* EEW = 8 */                                          \
4887     uint32_t maxsz = simd_maxsz(desc);                     \
4888     uint32_t i = env->vstart;                              \
4889                                                            \
4890     memcpy((uint8_t *)vd + H1(i),                          \
4891            (uint8_t *)vs2 + H1(i),                         \
4892            maxsz - env->vstart);                           \
4893                                                            \
4894     env->vstart = 0;                                       \
4895 }
4896 
4897 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
4898 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
4899 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
4900 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
4901 
4902 /* Vector Integer Extension */
4903 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4904 void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4905                   CPURISCVState *env, uint32_t desc)             \
4906 {                                                                \
4907     uint32_t vl = env->vl;                                       \
4908     uint32_t vm = vext_vm(desc);                                 \
4909     uint32_t i;                                                  \
4910                                                                  \
4911     for (i = env->vstart; i < vl; i++) {                         \
4912         if (!vm && !vext_elem_mask(v0, i)) {                     \
4913             continue;                                            \
4914         }                                                        \
4915         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4916     }                                                            \
4917     env->vstart = 0;                                             \
4918 }
4919 
4920 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4921 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4922 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4923 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4924 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4925 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4926 
4927 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4928 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4929 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4930 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4931 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4932 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4933