xref: /openbmc/qemu/target/riscv/vector_helper.c (revision 8a085fb2)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30 
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32                             target_ulong s2)
33 {
34     int vlmax, vl;
35     RISCVCPU *cpu = env_archcpu(env);
36     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39     int xlen = riscv_cpu_xlen(env);
40     bool vill = (s2 >> (xlen - 1)) & 0x1;
41     target_ulong reserved = s2 &
42                             MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43                                             xlen - 1 - R_VTYPE_RESERVED_SHIFT);
44 
45     if (lmul & 4) {
46         /* Fractional LMUL. */
47         if (lmul == 4 ||
48             cpu->cfg.elen >> (8 - lmul) < sew) {
49             vill = true;
50         }
51     }
52 
53     if ((sew > cpu->cfg.elen)
54         || vill
55         || (ediv != 0)
56         || (reserved != 0)) {
57         /* only set vill bit. */
58         env->vill = 1;
59         env->vtype = 0;
60         env->vl = 0;
61         env->vstart = 0;
62         return 0;
63     }
64 
65     vlmax = vext_get_vlmax(cpu, s2);
66     if (s1 <= vlmax) {
67         vl = s1;
68     } else {
69         vl = vlmax;
70     }
71     env->vl = vl;
72     env->vtype = s2;
73     env->vstart = 0;
74     env->vill = 0;
75     return vl;
76 }
77 
78 /*
79  * Note that vector data is stored in host-endian 64-bit chunks,
80  * so addressing units smaller than that needs a host-endian fixup.
81  */
82 #if HOST_BIG_ENDIAN
83 #define H1(x)   ((x) ^ 7)
84 #define H1_2(x) ((x) ^ 6)
85 #define H1_4(x) ((x) ^ 4)
86 #define H2(x)   ((x) ^ 3)
87 #define H4(x)   ((x) ^ 1)
88 #define H8(x)   ((x))
89 #else
90 #define H1(x)   (x)
91 #define H1_2(x) (x)
92 #define H1_4(x) (x)
93 #define H2(x)   (x)
94 #define H4(x)   (x)
95 #define H8(x)   (x)
96 #endif
97 
98 static inline uint32_t vext_nf(uint32_t desc)
99 {
100     return FIELD_EX32(simd_data(desc), VDATA, NF);
101 }
102 
103 static inline uint32_t vext_vm(uint32_t desc)
104 {
105     return FIELD_EX32(simd_data(desc), VDATA, VM);
106 }
107 
108 /*
109  * Encode LMUL to lmul as following:
110  *     LMUL    vlmul    lmul
111  *      1       000       0
112  *      2       001       1
113  *      4       010       2
114  *      8       011       3
115  *      -       100       -
116  *     1/8      101      -3
117  *     1/4      110      -2
118  *     1/2      111      -1
119  */
120 static inline int32_t vext_lmul(uint32_t desc)
121 {
122     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
123 }
124 
125 /*
126  * Get the maximum number of elements can be operated.
127  *
128  * esz: log2 of element size in bytes.
129  */
130 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
131 {
132     /*
133      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
134      * so vlen in bytes (vlenb) is encoded as maxsz.
135      */
136     uint32_t vlenb = simd_maxsz(desc);
137 
138     /* Return VLMAX */
139     int scale = vext_lmul(desc) - esz;
140     return scale < 0 ? vlenb >> -scale : vlenb << scale;
141 }
142 
143 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
144 {
145     return (addr & env->cur_pmmask) | env->cur_pmbase;
146 }
147 
148 /*
149  * This function checks watchpoint before real load operation.
150  *
151  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
152  * In user mode, there is no watchpoint support now.
153  *
154  * It will trigger an exception if there is no mapping in TLB
155  * and page table walk can't fill the TLB entry. Then the guest
156  * software can return here after process the exception or never return.
157  */
158 static void probe_pages(CPURISCVState *env, target_ulong addr,
159                         target_ulong len, uintptr_t ra,
160                         MMUAccessType access_type)
161 {
162     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
163     target_ulong curlen = MIN(pagelen, len);
164 
165     probe_access(env, adjust_addr(env, addr), curlen, access_type,
166                  cpu_mmu_index(env, false), ra);
167     if (len > curlen) {
168         addr += curlen;
169         curlen = len - curlen;
170         probe_access(env, adjust_addr(env, addr), curlen, access_type,
171                      cpu_mmu_index(env, false), ra);
172     }
173 }
174 
175 static inline void vext_set_elem_mask(void *v0, int index,
176                                       uint8_t value)
177 {
178     int idx = index / 64;
179     int pos = index % 64;
180     uint64_t old = ((uint64_t *)v0)[idx];
181     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
182 }
183 
184 /*
185  * Earlier designs (pre-0.9) had a varying number of bits
186  * per mask value (MLEN). In the 0.9 design, MLEN=1.
187  * (Section 4.5)
188  */
189 static inline int vext_elem_mask(void *v0, int index)
190 {
191     int idx = index / 64;
192     int pos = index  % 64;
193     return (((uint64_t *)v0)[idx] >> pos) & 1;
194 }
195 
196 /* elements operations for load and store */
197 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
198                                uint32_t idx, void *vd, uintptr_t retaddr);
199 
200 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
201 static void NAME(CPURISCVState *env, abi_ptr addr,         \
202                  uint32_t idx, void *vd, uintptr_t retaddr)\
203 {                                                          \
204     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
205     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
206 }                                                          \
207 
208 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
209 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
210 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
211 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
212 
213 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
214 static void NAME(CPURISCVState *env, abi_ptr addr,         \
215                  uint32_t idx, void *vd, uintptr_t retaddr)\
216 {                                                          \
217     ETYPE data = *((ETYPE *)vd + H(idx));                  \
218     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
219 }
220 
221 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
222 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
223 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
224 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
225 
226 /*
227  *** stride: access vector element from strided memory
228  */
229 static void
230 vext_ldst_stride(void *vd, void *v0, target_ulong base,
231                  target_ulong stride, CPURISCVState *env,
232                  uint32_t desc, uint32_t vm,
233                  vext_ldst_elem_fn *ldst_elem,
234                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
235 {
236     uint32_t i, k;
237     uint32_t nf = vext_nf(desc);
238     uint32_t max_elems = vext_max_elems(desc, esz);
239 
240     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
241         if (!vm && !vext_elem_mask(v0, i)) {
242             continue;
243         }
244 
245         k = 0;
246         while (k < nf) {
247             target_ulong addr = base + stride * i + (k << esz);
248             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
249             k++;
250         }
251     }
252     env->vstart = 0;
253 }
254 
255 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
256 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
257                   target_ulong stride, CPURISCVState *env,              \
258                   uint32_t desc)                                        \
259 {                                                                       \
260     uint32_t vm = vext_vm(desc);                                        \
261     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
262                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
263 }
264 
265 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
266 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
267 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
268 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
269 
270 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
271 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
272                   target_ulong stride, CPURISCVState *env,              \
273                   uint32_t desc)                                        \
274 {                                                                       \
275     uint32_t vm = vext_vm(desc);                                        \
276     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
277                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
278 }
279 
280 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
281 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
282 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
283 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
284 
285 /*
286  *** unit-stride: access elements stored contiguously in memory
287  */
288 
289 /* unmasked unit-stride load and store operation*/
290 static void
291 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
292              vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
293              uintptr_t ra, MMUAccessType access_type)
294 {
295     uint32_t i, k;
296     uint32_t nf = vext_nf(desc);
297     uint32_t max_elems = vext_max_elems(desc, esz);
298 
299     /* load bytes from guest memory */
300     for (i = env->vstart; i < evl; i++, env->vstart++) {
301         k = 0;
302         while (k < nf) {
303             target_ulong addr = base + ((i * nf + k) << esz);
304             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
305             k++;
306         }
307     }
308     env->vstart = 0;
309 }
310 
311 /*
312  * masked unit-stride load and store operation will be a special case of stride,
313  * stride = NF * sizeof (MTYPE)
314  */
315 
316 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
317 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
318                          CPURISCVState *env, uint32_t desc)             \
319 {                                                                       \
320     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
321     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
322                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
323 }                                                                       \
324                                                                         \
325 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
326                   CPURISCVState *env, uint32_t desc)                    \
327 {                                                                       \
328     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
329                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
330 }
331 
332 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
333 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
334 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
335 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
336 
337 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                            \
338 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,          \
339                          CPURISCVState *env, uint32_t desc)              \
340 {                                                                        \
341     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));              \
342     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,   \
343                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);      \
344 }                                                                        \
345                                                                          \
346 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                 \
347                   CPURISCVState *env, uint32_t desc)                     \
348 {                                                                        \
349     vext_ldst_us(vd, base, env, desc, STORE_FN,                          \
350                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
351 }
352 
353 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
354 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
355 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
356 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
357 
358 /*
359  *** unit stride mask load and store, EEW = 1
360  */
361 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
362                     CPURISCVState *env, uint32_t desc)
363 {
364     /* evl = ceil(vl/8) */
365     uint8_t evl = (env->vl + 7) >> 3;
366     vext_ldst_us(vd, base, env, desc, lde_b,
367                  0, evl, GETPC(), MMU_DATA_LOAD);
368 }
369 
370 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
371                     CPURISCVState *env, uint32_t desc)
372 {
373     /* evl = ceil(vl/8) */
374     uint8_t evl = (env->vl + 7) >> 3;
375     vext_ldst_us(vd, base, env, desc, ste_b,
376                  0, evl, GETPC(), MMU_DATA_STORE);
377 }
378 
379 /*
380  *** index: access vector element from indexed memory
381  */
382 typedef target_ulong vext_get_index_addr(target_ulong base,
383         uint32_t idx, void *vs2);
384 
385 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
386 static target_ulong NAME(target_ulong base,            \
387                          uint32_t idx, void *vs2)      \
388 {                                                      \
389     return (base + *((ETYPE *)vs2 + H(idx)));          \
390 }
391 
392 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
393 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
394 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
395 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
396 
397 static inline void
398 vext_ldst_index(void *vd, void *v0, target_ulong base,
399                 void *vs2, CPURISCVState *env, uint32_t desc,
400                 vext_get_index_addr get_index_addr,
401                 vext_ldst_elem_fn *ldst_elem,
402                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
403 {
404     uint32_t i, k;
405     uint32_t nf = vext_nf(desc);
406     uint32_t vm = vext_vm(desc);
407     uint32_t max_elems = vext_max_elems(desc, esz);
408 
409     /* load bytes from guest memory */
410     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
411         if (!vm && !vext_elem_mask(v0, i)) {
412             continue;
413         }
414 
415         k = 0;
416         while (k < nf) {
417             abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
418             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
419             k++;
420         }
421     }
422     env->vstart = 0;
423 }
424 
425 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
426 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
427                   void *vs2, CPURISCVState *env, uint32_t desc)            \
428 {                                                                          \
429     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
430                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
431 }
432 
433 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
434 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
435 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
436 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
437 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
438 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
439 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
440 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
441 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
442 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
443 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
444 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
445 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
446 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
447 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
448 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
449 
450 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
451 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
452                   void *vs2, CPURISCVState *env, uint32_t desc)  \
453 {                                                                \
454     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
455                     STORE_FN, ctzl(sizeof(ETYPE)),               \
456                     GETPC(), MMU_DATA_STORE);                    \
457 }
458 
459 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
460 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
461 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
462 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
463 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
464 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
465 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
466 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
467 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
468 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
469 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
470 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
471 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
472 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
473 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
474 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
475 
476 /*
477  *** unit-stride fault-only-fisrt load instructions
478  */
479 static inline void
480 vext_ldff(void *vd, void *v0, target_ulong base,
481           CPURISCVState *env, uint32_t desc,
482           vext_ldst_elem_fn *ldst_elem,
483           uint32_t esz, uintptr_t ra)
484 {
485     void *host;
486     uint32_t i, k, vl = 0;
487     uint32_t nf = vext_nf(desc);
488     uint32_t vm = vext_vm(desc);
489     uint32_t max_elems = vext_max_elems(desc, esz);
490     target_ulong addr, offset, remain;
491 
492     /* probe every access*/
493     for (i = env->vstart; i < env->vl; i++) {
494         if (!vm && !vext_elem_mask(v0, i)) {
495             continue;
496         }
497         addr = adjust_addr(env, base + i * (nf << esz));
498         if (i == 0) {
499             probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
500         } else {
501             /* if it triggers an exception, no need to check watchpoint */
502             remain = nf << esz;
503             while (remain > 0) {
504                 offset = -(addr | TARGET_PAGE_MASK);
505                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
506                                          cpu_mmu_index(env, false));
507                 if (host) {
508 #ifdef CONFIG_USER_ONLY
509                     if (page_check_range(addr, offset, PAGE_READ) < 0) {
510                         vl = i;
511                         goto ProbeSuccess;
512                     }
513 #else
514                     probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
515 #endif
516                 } else {
517                     vl = i;
518                     goto ProbeSuccess;
519                 }
520                 if (remain <=  offset) {
521                     break;
522                 }
523                 remain -= offset;
524                 addr = adjust_addr(env, addr + offset);
525             }
526         }
527     }
528 ProbeSuccess:
529     /* load bytes from guest memory */
530     if (vl != 0) {
531         env->vl = vl;
532     }
533     for (i = env->vstart; i < env->vl; i++) {
534         k = 0;
535         if (!vm && !vext_elem_mask(v0, i)) {
536             continue;
537         }
538         while (k < nf) {
539             target_ulong addr = base + ((i * nf + k) << esz);
540             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
541             k++;
542         }
543     }
544     env->vstart = 0;
545 }
546 
547 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
548 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
549                   CPURISCVState *env, uint32_t desc)      \
550 {                                                         \
551     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
552               ctzl(sizeof(ETYPE)), GETPC());              \
553 }
554 
555 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
556 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
557 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
558 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
559 
560 #define DO_SWAP(N, M) (M)
561 #define DO_AND(N, M)  (N & M)
562 #define DO_XOR(N, M)  (N ^ M)
563 #define DO_OR(N, M)   (N | M)
564 #define DO_ADD(N, M)  (N + M)
565 
566 /* Signed min/max */
567 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
568 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
569 
570 /* Unsigned min/max */
571 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
572 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
573 
574 /*
575  *** load and store whole register instructions
576  */
577 static void
578 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
579                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
580                 MMUAccessType access_type)
581 {
582     uint32_t i, k, off, pos;
583     uint32_t nf = vext_nf(desc);
584     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
585     uint32_t max_elems = vlenb >> esz;
586 
587     k = env->vstart / max_elems;
588     off = env->vstart % max_elems;
589 
590     if (off) {
591         /* load/store rest of elements of current segment pointed by vstart */
592         for (pos = off; pos < max_elems; pos++, env->vstart++) {
593             target_ulong addr = base + ((pos + k * max_elems) << esz);
594             ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
595         }
596         k++;
597     }
598 
599     /* load/store elements for rest of segments */
600     for (; k < nf; k++) {
601         for (i = 0; i < max_elems; i++, env->vstart++) {
602             target_ulong addr = base + ((i + k * max_elems) << esz);
603             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
604         }
605     }
606 
607     env->vstart = 0;
608 }
609 
610 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
611 void HELPER(NAME)(void *vd, target_ulong base,       \
612                   CPURISCVState *env, uint32_t desc) \
613 {                                                    \
614     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
615                     ctzl(sizeof(ETYPE)), GETPC(),    \
616                     MMU_DATA_LOAD);                  \
617 }
618 
619 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
620 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
621 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
622 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
623 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
624 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
625 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
626 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
627 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
628 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
629 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
630 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
631 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
632 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
633 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
634 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
635 
636 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
637 void HELPER(NAME)(void *vd, target_ulong base,       \
638                   CPURISCVState *env, uint32_t desc) \
639 {                                                    \
640     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
641                     ctzl(sizeof(ETYPE)), GETPC(),    \
642                     MMU_DATA_STORE);                 \
643 }
644 
645 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
646 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
647 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
648 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
649 
650 /*
651  *** Vector Integer Arithmetic Instructions
652  */
653 
654 /* expand macro args before macro */
655 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
656 
657 /* (TD, T1, T2, TX1, TX2) */
658 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
659 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
660 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
661 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
662 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
663 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
664 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
665 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
666 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
667 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
668 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
669 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
670 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
671 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
672 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
673 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
674 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
675 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
676 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
677 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
678 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
679 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
680 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
681 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
682 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
683 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
684 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
685 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
686 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
687 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
688 
689 /* operation of two vector elements */
690 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
691 
692 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
693 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
694 {                                                               \
695     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
696     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
697     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
698 }
699 #define DO_SUB(N, M) (N - M)
700 #define DO_RSUB(N, M) (M - N)
701 
702 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
703 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
704 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
705 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
706 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
707 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
708 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
709 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
710 
711 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
712                        CPURISCVState *env, uint32_t desc,
713                        opivv2_fn *fn)
714 {
715     uint32_t vm = vext_vm(desc);
716     uint32_t vl = env->vl;
717     uint32_t i;
718 
719     for (i = env->vstart; i < vl; i++) {
720         if (!vm && !vext_elem_mask(v0, i)) {
721             continue;
722         }
723         fn(vd, vs1, vs2, i);
724     }
725     env->vstart = 0;
726 }
727 
728 /* generate the helpers for OPIVV */
729 #define GEN_VEXT_VV(NAME)                                 \
730 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
731                   void *vs2, CPURISCVState *env,          \
732                   uint32_t desc)                          \
733 {                                                         \
734     do_vext_vv(vd, v0, vs1, vs2, env, desc,               \
735                do_##NAME);                                \
736 }
737 
738 GEN_VEXT_VV(vadd_vv_b)
739 GEN_VEXT_VV(vadd_vv_h)
740 GEN_VEXT_VV(vadd_vv_w)
741 GEN_VEXT_VV(vadd_vv_d)
742 GEN_VEXT_VV(vsub_vv_b)
743 GEN_VEXT_VV(vsub_vv_h)
744 GEN_VEXT_VV(vsub_vv_w)
745 GEN_VEXT_VV(vsub_vv_d)
746 
747 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
748 
749 /*
750  * (T1)s1 gives the real operator type.
751  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
752  */
753 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
754 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
755 {                                                                   \
756     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
757     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
758 }
759 
760 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
761 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
762 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
763 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
764 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
765 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
766 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
767 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
768 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
769 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
770 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
771 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
772 
773 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
774                        CPURISCVState *env, uint32_t desc,
775                        opivx2_fn fn)
776 {
777     uint32_t vm = vext_vm(desc);
778     uint32_t vl = env->vl;
779     uint32_t i;
780 
781     for (i = env->vstart; i < vl; i++) {
782         if (!vm && !vext_elem_mask(v0, i)) {
783             continue;
784         }
785         fn(vd, s1, vs2, i);
786     }
787     env->vstart = 0;
788 }
789 
790 /* generate the helpers for OPIVX */
791 #define GEN_VEXT_VX(NAME)                                 \
792 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
793                   void *vs2, CPURISCVState *env,          \
794                   uint32_t desc)                          \
795 {                                                         \
796     do_vext_vx(vd, v0, s1, vs2, env, desc,                \
797                do_##NAME);                                \
798 }
799 
800 GEN_VEXT_VX(vadd_vx_b)
801 GEN_VEXT_VX(vadd_vx_h)
802 GEN_VEXT_VX(vadd_vx_w)
803 GEN_VEXT_VX(vadd_vx_d)
804 GEN_VEXT_VX(vsub_vx_b)
805 GEN_VEXT_VX(vsub_vx_h)
806 GEN_VEXT_VX(vsub_vx_w)
807 GEN_VEXT_VX(vsub_vx_d)
808 GEN_VEXT_VX(vrsub_vx_b)
809 GEN_VEXT_VX(vrsub_vx_h)
810 GEN_VEXT_VX(vrsub_vx_w)
811 GEN_VEXT_VX(vrsub_vx_d)
812 
813 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
814 {
815     intptr_t oprsz = simd_oprsz(desc);
816     intptr_t i;
817 
818     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
819         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
820     }
821 }
822 
823 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
824 {
825     intptr_t oprsz = simd_oprsz(desc);
826     intptr_t i;
827 
828     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
829         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
830     }
831 }
832 
833 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
834 {
835     intptr_t oprsz = simd_oprsz(desc);
836     intptr_t i;
837 
838     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
839         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
840     }
841 }
842 
843 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
844 {
845     intptr_t oprsz = simd_oprsz(desc);
846     intptr_t i;
847 
848     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
849         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
850     }
851 }
852 
853 /* Vector Widening Integer Add/Subtract */
854 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
855 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
856 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
857 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
858 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
859 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
860 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
861 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
862 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
863 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
864 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
865 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
866 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
867 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
868 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
869 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
870 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
871 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
872 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
873 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
874 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
875 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
876 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
877 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
878 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
879 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
880 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
881 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
882 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
883 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
884 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
885 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
886 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
887 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
888 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
889 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
890 GEN_VEXT_VV(vwaddu_vv_b)
891 GEN_VEXT_VV(vwaddu_vv_h)
892 GEN_VEXT_VV(vwaddu_vv_w)
893 GEN_VEXT_VV(vwsubu_vv_b)
894 GEN_VEXT_VV(vwsubu_vv_h)
895 GEN_VEXT_VV(vwsubu_vv_w)
896 GEN_VEXT_VV(vwadd_vv_b)
897 GEN_VEXT_VV(vwadd_vv_h)
898 GEN_VEXT_VV(vwadd_vv_w)
899 GEN_VEXT_VV(vwsub_vv_b)
900 GEN_VEXT_VV(vwsub_vv_h)
901 GEN_VEXT_VV(vwsub_vv_w)
902 GEN_VEXT_VV(vwaddu_wv_b)
903 GEN_VEXT_VV(vwaddu_wv_h)
904 GEN_VEXT_VV(vwaddu_wv_w)
905 GEN_VEXT_VV(vwsubu_wv_b)
906 GEN_VEXT_VV(vwsubu_wv_h)
907 GEN_VEXT_VV(vwsubu_wv_w)
908 GEN_VEXT_VV(vwadd_wv_b)
909 GEN_VEXT_VV(vwadd_wv_h)
910 GEN_VEXT_VV(vwadd_wv_w)
911 GEN_VEXT_VV(vwsub_wv_b)
912 GEN_VEXT_VV(vwsub_wv_h)
913 GEN_VEXT_VV(vwsub_wv_w)
914 
915 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
916 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
917 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
918 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
919 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
920 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
921 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
922 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
923 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
924 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
925 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
926 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
927 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
928 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
929 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
930 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
931 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
932 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
933 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
934 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
935 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
936 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
937 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
938 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
939 GEN_VEXT_VX(vwaddu_vx_b)
940 GEN_VEXT_VX(vwaddu_vx_h)
941 GEN_VEXT_VX(vwaddu_vx_w)
942 GEN_VEXT_VX(vwsubu_vx_b)
943 GEN_VEXT_VX(vwsubu_vx_h)
944 GEN_VEXT_VX(vwsubu_vx_w)
945 GEN_VEXT_VX(vwadd_vx_b)
946 GEN_VEXT_VX(vwadd_vx_h)
947 GEN_VEXT_VX(vwadd_vx_w)
948 GEN_VEXT_VX(vwsub_vx_b)
949 GEN_VEXT_VX(vwsub_vx_h)
950 GEN_VEXT_VX(vwsub_vx_w)
951 GEN_VEXT_VX(vwaddu_wx_b)
952 GEN_VEXT_VX(vwaddu_wx_h)
953 GEN_VEXT_VX(vwaddu_wx_w)
954 GEN_VEXT_VX(vwsubu_wx_b)
955 GEN_VEXT_VX(vwsubu_wx_h)
956 GEN_VEXT_VX(vwsubu_wx_w)
957 GEN_VEXT_VX(vwadd_wx_b)
958 GEN_VEXT_VX(vwadd_wx_h)
959 GEN_VEXT_VX(vwadd_wx_w)
960 GEN_VEXT_VX(vwsub_wx_b)
961 GEN_VEXT_VX(vwsub_wx_h)
962 GEN_VEXT_VX(vwsub_wx_w)
963 
964 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
965 #define DO_VADC(N, M, C) (N + M + C)
966 #define DO_VSBC(N, M, C) (N - M - C)
967 
968 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
969 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
970                   CPURISCVState *env, uint32_t desc)          \
971 {                                                             \
972     uint32_t vl = env->vl;                                    \
973     uint32_t i;                                               \
974                                                               \
975     for (i = env->vstart; i < vl; i++) {                      \
976         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
977         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
978         ETYPE carry = vext_elem_mask(v0, i);                  \
979                                                               \
980         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
981     }                                                         \
982     env->vstart = 0;                                          \
983 }
984 
985 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
986 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
987 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
988 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
989 
990 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
991 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
992 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
993 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
994 
995 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
996 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
997                   CPURISCVState *env, uint32_t desc)                     \
998 {                                                                        \
999     uint32_t vl = env->vl;                                               \
1000     uint32_t i;                                                          \
1001                                                                          \
1002     for (i = env->vstart; i < vl; i++) {                                 \
1003         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
1004         ETYPE carry = vext_elem_mask(v0, i);                             \
1005                                                                          \
1006         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1007     }                                                                    \
1008     env->vstart = 0;                                          \
1009 }
1010 
1011 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
1012 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1013 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1014 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1015 
1016 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
1017 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1018 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1019 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1020 
1021 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
1022                           (__typeof(N))(N + M) < N)
1023 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1024 
1025 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
1026 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1027                   CPURISCVState *env, uint32_t desc)          \
1028 {                                                             \
1029     uint32_t vl = env->vl;                                    \
1030     uint32_t vm = vext_vm(desc);                              \
1031     uint32_t i;                                               \
1032                                                               \
1033     for (i = env->vstart; i < vl; i++) {                      \
1034         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1035         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1036         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1037         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1038     }                                                         \
1039     env->vstart = 0;                                          \
1040 }
1041 
1042 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1043 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1044 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1045 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1046 
1047 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1048 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1049 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1050 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1051 
1052 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1053 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1054                   void *vs2, CPURISCVState *env, uint32_t desc) \
1055 {                                                               \
1056     uint32_t vl = env->vl;                                      \
1057     uint32_t vm = vext_vm(desc);                                \
1058     uint32_t i;                                                 \
1059                                                                 \
1060     for (i = env->vstart; i < vl; i++) {                        \
1061         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1062         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1063         vext_set_elem_mask(vd, i,                               \
1064                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1065     }                                                           \
1066     env->vstart = 0;                                            \
1067 }
1068 
1069 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1070 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1071 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1072 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1073 
1074 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1075 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1076 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1077 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1078 
1079 /* Vector Bitwise Logical Instructions */
1080 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1081 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1082 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1083 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1084 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1085 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1086 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1087 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1088 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1089 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1090 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1091 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1092 GEN_VEXT_VV(vand_vv_b)
1093 GEN_VEXT_VV(vand_vv_h)
1094 GEN_VEXT_VV(vand_vv_w)
1095 GEN_VEXT_VV(vand_vv_d)
1096 GEN_VEXT_VV(vor_vv_b)
1097 GEN_VEXT_VV(vor_vv_h)
1098 GEN_VEXT_VV(vor_vv_w)
1099 GEN_VEXT_VV(vor_vv_d)
1100 GEN_VEXT_VV(vxor_vv_b)
1101 GEN_VEXT_VV(vxor_vv_h)
1102 GEN_VEXT_VV(vxor_vv_w)
1103 GEN_VEXT_VV(vxor_vv_d)
1104 
1105 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1106 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1107 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1108 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1109 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1110 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1111 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1112 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1113 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1114 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1115 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1116 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1117 GEN_VEXT_VX(vand_vx_b)
1118 GEN_VEXT_VX(vand_vx_h)
1119 GEN_VEXT_VX(vand_vx_w)
1120 GEN_VEXT_VX(vand_vx_d)
1121 GEN_VEXT_VX(vor_vx_b)
1122 GEN_VEXT_VX(vor_vx_h)
1123 GEN_VEXT_VX(vor_vx_w)
1124 GEN_VEXT_VX(vor_vx_d)
1125 GEN_VEXT_VX(vxor_vx_b)
1126 GEN_VEXT_VX(vxor_vx_h)
1127 GEN_VEXT_VX(vxor_vx_w)
1128 GEN_VEXT_VX(vxor_vx_d)
1129 
1130 /* Vector Single-Width Bit Shift Instructions */
1131 #define DO_SLL(N, M)  (N << (M))
1132 #define DO_SRL(N, M)  (N >> (M))
1133 
1134 /* generate the helpers for shift instructions with two vector operators */
1135 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1136 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1137                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1138 {                                                                         \
1139     uint32_t vm = vext_vm(desc);                                          \
1140     uint32_t vl = env->vl;                                                \
1141     uint32_t i;                                                           \
1142                                                                           \
1143     for (i = env->vstart; i < vl; i++) {                                  \
1144         if (!vm && !vext_elem_mask(v0, i)) {                              \
1145             continue;                                                     \
1146         }                                                                 \
1147         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1148         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1149         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1150     }                                                                     \
1151     env->vstart = 0;                                                      \
1152 }
1153 
1154 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1155 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1156 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1157 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1158 
1159 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1160 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1161 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1162 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1163 
1164 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1165 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1166 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1167 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1168 
1169 /* generate the helpers for shift instructions with one vector and one scalar */
1170 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1171 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1172         void *vs2, CPURISCVState *env, uint32_t desc)       \
1173 {                                                           \
1174     uint32_t vm = vext_vm(desc);                            \
1175     uint32_t vl = env->vl;                                  \
1176     uint32_t i;                                             \
1177                                                             \
1178     for (i = env->vstart; i < vl; i++) {                    \
1179         if (!vm && !vext_elem_mask(v0, i)) {                \
1180             continue;                                       \
1181         }                                                   \
1182         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1183         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1184     }                                                       \
1185     env->vstart = 0;                                        \
1186 }
1187 
1188 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1189 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1190 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1191 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1192 
1193 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1194 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1195 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1196 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1197 
1198 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1199 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1200 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1201 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1202 
1203 /* Vector Narrowing Integer Right Shift Instructions */
1204 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1205 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1206 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1207 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1208 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1209 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1210 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1211 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1212 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1213 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1214 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1215 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1216 
1217 /* Vector Integer Comparison Instructions */
1218 #define DO_MSEQ(N, M) (N == M)
1219 #define DO_MSNE(N, M) (N != M)
1220 #define DO_MSLT(N, M) (N < M)
1221 #define DO_MSLE(N, M) (N <= M)
1222 #define DO_MSGT(N, M) (N > M)
1223 
1224 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1225 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1226                   CPURISCVState *env, uint32_t desc)          \
1227 {                                                             \
1228     uint32_t vm = vext_vm(desc);                              \
1229     uint32_t vl = env->vl;                                    \
1230     uint32_t i;                                               \
1231                                                               \
1232     for (i = env->vstart; i < vl; i++) {                      \
1233         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1234         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1235         if (!vm && !vext_elem_mask(v0, i)) {                  \
1236             continue;                                         \
1237         }                                                     \
1238         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1239     }                                                         \
1240     env->vstart = 0;                                          \
1241 }
1242 
1243 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1244 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1245 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1246 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1247 
1248 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1249 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1250 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1251 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1252 
1253 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1254 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1255 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1256 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1257 
1258 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1259 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1260 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1261 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1262 
1263 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1264 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1265 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1266 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1267 
1268 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1269 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1270 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1271 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1272 
1273 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1274 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1275                   CPURISCVState *env, uint32_t desc)                \
1276 {                                                                   \
1277     uint32_t vm = vext_vm(desc);                                    \
1278     uint32_t vl = env->vl;                                          \
1279     uint32_t i;                                                     \
1280                                                                     \
1281     for (i = env->vstart; i < vl; i++) {                            \
1282         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1283         if (!vm && !vext_elem_mask(v0, i)) {                        \
1284             continue;                                               \
1285         }                                                           \
1286         vext_set_elem_mask(vd, i,                                   \
1287                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1288     }                                                               \
1289     env->vstart = 0;                                                \
1290 }
1291 
1292 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1293 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1294 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1295 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1296 
1297 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1298 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1299 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1300 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1301 
1302 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1303 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1304 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1305 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1306 
1307 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1308 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1309 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1310 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1311 
1312 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1313 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1314 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1315 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1316 
1317 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1318 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1319 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1320 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1321 
1322 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1323 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1324 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1325 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1326 
1327 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1328 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1329 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1330 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1331 
1332 /* Vector Integer Min/Max Instructions */
1333 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1334 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1335 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1336 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1337 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1338 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1339 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1340 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1341 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1342 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1343 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1344 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1345 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1346 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1347 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1348 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1349 GEN_VEXT_VV(vminu_vv_b)
1350 GEN_VEXT_VV(vminu_vv_h)
1351 GEN_VEXT_VV(vminu_vv_w)
1352 GEN_VEXT_VV(vminu_vv_d)
1353 GEN_VEXT_VV(vmin_vv_b)
1354 GEN_VEXT_VV(vmin_vv_h)
1355 GEN_VEXT_VV(vmin_vv_w)
1356 GEN_VEXT_VV(vmin_vv_d)
1357 GEN_VEXT_VV(vmaxu_vv_b)
1358 GEN_VEXT_VV(vmaxu_vv_h)
1359 GEN_VEXT_VV(vmaxu_vv_w)
1360 GEN_VEXT_VV(vmaxu_vv_d)
1361 GEN_VEXT_VV(vmax_vv_b)
1362 GEN_VEXT_VV(vmax_vv_h)
1363 GEN_VEXT_VV(vmax_vv_w)
1364 GEN_VEXT_VV(vmax_vv_d)
1365 
1366 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1367 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1368 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1369 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1370 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1371 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1372 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1373 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1374 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1375 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1376 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1377 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1378 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1379 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1380 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1381 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1382 GEN_VEXT_VX(vminu_vx_b)
1383 GEN_VEXT_VX(vminu_vx_h)
1384 GEN_VEXT_VX(vminu_vx_w)
1385 GEN_VEXT_VX(vminu_vx_d)
1386 GEN_VEXT_VX(vmin_vx_b)
1387 GEN_VEXT_VX(vmin_vx_h)
1388 GEN_VEXT_VX(vmin_vx_w)
1389 GEN_VEXT_VX(vmin_vx_d)
1390 GEN_VEXT_VX(vmaxu_vx_b)
1391 GEN_VEXT_VX(vmaxu_vx_h)
1392 GEN_VEXT_VX(vmaxu_vx_w)
1393 GEN_VEXT_VX(vmaxu_vx_d)
1394 GEN_VEXT_VX(vmax_vx_b)
1395 GEN_VEXT_VX(vmax_vx_h)
1396 GEN_VEXT_VX(vmax_vx_w)
1397 GEN_VEXT_VX(vmax_vx_d)
1398 
1399 /* Vector Single-Width Integer Multiply Instructions */
1400 #define DO_MUL(N, M) (N * M)
1401 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1402 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1403 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1404 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1405 GEN_VEXT_VV(vmul_vv_b)
1406 GEN_VEXT_VV(vmul_vv_h)
1407 GEN_VEXT_VV(vmul_vv_w)
1408 GEN_VEXT_VV(vmul_vv_d)
1409 
1410 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1411 {
1412     return (int16_t)s2 * (int16_t)s1 >> 8;
1413 }
1414 
1415 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1416 {
1417     return (int32_t)s2 * (int32_t)s1 >> 16;
1418 }
1419 
1420 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1421 {
1422     return (int64_t)s2 * (int64_t)s1 >> 32;
1423 }
1424 
1425 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1426 {
1427     uint64_t hi_64, lo_64;
1428 
1429     muls64(&lo_64, &hi_64, s1, s2);
1430     return hi_64;
1431 }
1432 
1433 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1434 {
1435     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1436 }
1437 
1438 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1439 {
1440     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1441 }
1442 
1443 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1444 {
1445     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1446 }
1447 
1448 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1449 {
1450     uint64_t hi_64, lo_64;
1451 
1452     mulu64(&lo_64, &hi_64, s2, s1);
1453     return hi_64;
1454 }
1455 
1456 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1457 {
1458     return (int16_t)s2 * (uint16_t)s1 >> 8;
1459 }
1460 
1461 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1462 {
1463     return (int32_t)s2 * (uint32_t)s1 >> 16;
1464 }
1465 
1466 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1467 {
1468     return (int64_t)s2 * (uint64_t)s1 >> 32;
1469 }
1470 
1471 /*
1472  * Let  A = signed operand,
1473  *      B = unsigned operand
1474  *      P = mulu64(A, B), unsigned product
1475  *
1476  * LET  X = 2 ** 64  - A, 2's complement of A
1477  *      SP = signed product
1478  * THEN
1479  *      IF A < 0
1480  *          SP = -X * B
1481  *             = -(2 ** 64 - A) * B
1482  *             = A * B - 2 ** 64 * B
1483  *             = P - 2 ** 64 * B
1484  *      ELSE
1485  *          SP = P
1486  * THEN
1487  *      HI_P -= (A < 0 ? B : 0)
1488  */
1489 
1490 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1491 {
1492     uint64_t hi_64, lo_64;
1493 
1494     mulu64(&lo_64, &hi_64, s2, s1);
1495 
1496     hi_64 -= s2 < 0 ? s1 : 0;
1497     return hi_64;
1498 }
1499 
1500 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1501 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1502 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1503 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1504 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1505 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1506 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1507 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1508 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1509 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1510 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1511 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1512 GEN_VEXT_VV(vmulh_vv_b)
1513 GEN_VEXT_VV(vmulh_vv_h)
1514 GEN_VEXT_VV(vmulh_vv_w)
1515 GEN_VEXT_VV(vmulh_vv_d)
1516 GEN_VEXT_VV(vmulhu_vv_b)
1517 GEN_VEXT_VV(vmulhu_vv_h)
1518 GEN_VEXT_VV(vmulhu_vv_w)
1519 GEN_VEXT_VV(vmulhu_vv_d)
1520 GEN_VEXT_VV(vmulhsu_vv_b)
1521 GEN_VEXT_VV(vmulhsu_vv_h)
1522 GEN_VEXT_VV(vmulhsu_vv_w)
1523 GEN_VEXT_VV(vmulhsu_vv_d)
1524 
1525 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1526 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1527 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1528 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1529 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1530 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1531 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1532 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1533 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1534 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1535 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1536 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1537 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1538 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1539 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1540 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1541 GEN_VEXT_VX(vmul_vx_b)
1542 GEN_VEXT_VX(vmul_vx_h)
1543 GEN_VEXT_VX(vmul_vx_w)
1544 GEN_VEXT_VX(vmul_vx_d)
1545 GEN_VEXT_VX(vmulh_vx_b)
1546 GEN_VEXT_VX(vmulh_vx_h)
1547 GEN_VEXT_VX(vmulh_vx_w)
1548 GEN_VEXT_VX(vmulh_vx_d)
1549 GEN_VEXT_VX(vmulhu_vx_b)
1550 GEN_VEXT_VX(vmulhu_vx_h)
1551 GEN_VEXT_VX(vmulhu_vx_w)
1552 GEN_VEXT_VX(vmulhu_vx_d)
1553 GEN_VEXT_VX(vmulhsu_vx_b)
1554 GEN_VEXT_VX(vmulhsu_vx_h)
1555 GEN_VEXT_VX(vmulhsu_vx_w)
1556 GEN_VEXT_VX(vmulhsu_vx_d)
1557 
1558 /* Vector Integer Divide Instructions */
1559 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1560 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1561 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1562         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1563 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1564         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1565 
1566 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1567 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1568 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1569 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1570 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1571 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1572 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1573 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1574 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1575 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1576 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1577 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1578 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1579 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1580 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1581 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1582 GEN_VEXT_VV(vdivu_vv_b)
1583 GEN_VEXT_VV(vdivu_vv_h)
1584 GEN_VEXT_VV(vdivu_vv_w)
1585 GEN_VEXT_VV(vdivu_vv_d)
1586 GEN_VEXT_VV(vdiv_vv_b)
1587 GEN_VEXT_VV(vdiv_vv_h)
1588 GEN_VEXT_VV(vdiv_vv_w)
1589 GEN_VEXT_VV(vdiv_vv_d)
1590 GEN_VEXT_VV(vremu_vv_b)
1591 GEN_VEXT_VV(vremu_vv_h)
1592 GEN_VEXT_VV(vremu_vv_w)
1593 GEN_VEXT_VV(vremu_vv_d)
1594 GEN_VEXT_VV(vrem_vv_b)
1595 GEN_VEXT_VV(vrem_vv_h)
1596 GEN_VEXT_VV(vrem_vv_w)
1597 GEN_VEXT_VV(vrem_vv_d)
1598 
1599 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1600 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1601 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1602 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1603 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1604 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1605 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1606 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1607 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1608 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1609 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1610 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1611 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1612 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1613 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1614 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1615 GEN_VEXT_VX(vdivu_vx_b)
1616 GEN_VEXT_VX(vdivu_vx_h)
1617 GEN_VEXT_VX(vdivu_vx_w)
1618 GEN_VEXT_VX(vdivu_vx_d)
1619 GEN_VEXT_VX(vdiv_vx_b)
1620 GEN_VEXT_VX(vdiv_vx_h)
1621 GEN_VEXT_VX(vdiv_vx_w)
1622 GEN_VEXT_VX(vdiv_vx_d)
1623 GEN_VEXT_VX(vremu_vx_b)
1624 GEN_VEXT_VX(vremu_vx_h)
1625 GEN_VEXT_VX(vremu_vx_w)
1626 GEN_VEXT_VX(vremu_vx_d)
1627 GEN_VEXT_VX(vrem_vx_b)
1628 GEN_VEXT_VX(vrem_vx_h)
1629 GEN_VEXT_VX(vrem_vx_w)
1630 GEN_VEXT_VX(vrem_vx_d)
1631 
1632 /* Vector Widening Integer Multiply Instructions */
1633 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1634 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1635 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1636 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1637 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1638 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1639 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1640 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1641 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1642 GEN_VEXT_VV(vwmul_vv_b)
1643 GEN_VEXT_VV(vwmul_vv_h)
1644 GEN_VEXT_VV(vwmul_vv_w)
1645 GEN_VEXT_VV(vwmulu_vv_b)
1646 GEN_VEXT_VV(vwmulu_vv_h)
1647 GEN_VEXT_VV(vwmulu_vv_w)
1648 GEN_VEXT_VV(vwmulsu_vv_b)
1649 GEN_VEXT_VV(vwmulsu_vv_h)
1650 GEN_VEXT_VV(vwmulsu_vv_w)
1651 
1652 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1653 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1654 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1655 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1656 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1657 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1658 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1659 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1660 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1661 GEN_VEXT_VX(vwmul_vx_b)
1662 GEN_VEXT_VX(vwmul_vx_h)
1663 GEN_VEXT_VX(vwmul_vx_w)
1664 GEN_VEXT_VX(vwmulu_vx_b)
1665 GEN_VEXT_VX(vwmulu_vx_h)
1666 GEN_VEXT_VX(vwmulu_vx_w)
1667 GEN_VEXT_VX(vwmulsu_vx_b)
1668 GEN_VEXT_VX(vwmulsu_vx_h)
1669 GEN_VEXT_VX(vwmulsu_vx_w)
1670 
1671 /* Vector Single-Width Integer Multiply-Add Instructions */
1672 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1673 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1674 {                                                                  \
1675     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1676     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1677     TD d = *((TD *)vd + HD(i));                                    \
1678     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1679 }
1680 
1681 #define DO_MACC(N, M, D) (M * N + D)
1682 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1683 #define DO_MADD(N, M, D) (M * D + N)
1684 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1685 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1686 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1687 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1688 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1689 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1690 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1691 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1692 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1693 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1694 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1695 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1696 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1697 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1698 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1699 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1700 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1701 GEN_VEXT_VV(vmacc_vv_b)
1702 GEN_VEXT_VV(vmacc_vv_h)
1703 GEN_VEXT_VV(vmacc_vv_w)
1704 GEN_VEXT_VV(vmacc_vv_d)
1705 GEN_VEXT_VV(vnmsac_vv_b)
1706 GEN_VEXT_VV(vnmsac_vv_h)
1707 GEN_VEXT_VV(vnmsac_vv_w)
1708 GEN_VEXT_VV(vnmsac_vv_d)
1709 GEN_VEXT_VV(vmadd_vv_b)
1710 GEN_VEXT_VV(vmadd_vv_h)
1711 GEN_VEXT_VV(vmadd_vv_w)
1712 GEN_VEXT_VV(vmadd_vv_d)
1713 GEN_VEXT_VV(vnmsub_vv_b)
1714 GEN_VEXT_VV(vnmsub_vv_h)
1715 GEN_VEXT_VV(vnmsub_vv_w)
1716 GEN_VEXT_VV(vnmsub_vv_d)
1717 
1718 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1719 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1720 {                                                                   \
1721     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1722     TD d = *((TD *)vd + HD(i));                                     \
1723     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1724 }
1725 
1726 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1727 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1728 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1729 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1730 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1731 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1732 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1733 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1734 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1735 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1736 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1737 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1738 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1739 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1740 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1741 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1742 GEN_VEXT_VX(vmacc_vx_b)
1743 GEN_VEXT_VX(vmacc_vx_h)
1744 GEN_VEXT_VX(vmacc_vx_w)
1745 GEN_VEXT_VX(vmacc_vx_d)
1746 GEN_VEXT_VX(vnmsac_vx_b)
1747 GEN_VEXT_VX(vnmsac_vx_h)
1748 GEN_VEXT_VX(vnmsac_vx_w)
1749 GEN_VEXT_VX(vnmsac_vx_d)
1750 GEN_VEXT_VX(vmadd_vx_b)
1751 GEN_VEXT_VX(vmadd_vx_h)
1752 GEN_VEXT_VX(vmadd_vx_w)
1753 GEN_VEXT_VX(vmadd_vx_d)
1754 GEN_VEXT_VX(vnmsub_vx_b)
1755 GEN_VEXT_VX(vnmsub_vx_h)
1756 GEN_VEXT_VX(vnmsub_vx_w)
1757 GEN_VEXT_VX(vnmsub_vx_d)
1758 
1759 /* Vector Widening Integer Multiply-Add Instructions */
1760 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1761 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1762 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1763 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1764 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1765 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1766 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1767 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1768 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1769 GEN_VEXT_VV(vwmaccu_vv_b)
1770 GEN_VEXT_VV(vwmaccu_vv_h)
1771 GEN_VEXT_VV(vwmaccu_vv_w)
1772 GEN_VEXT_VV(vwmacc_vv_b)
1773 GEN_VEXT_VV(vwmacc_vv_h)
1774 GEN_VEXT_VV(vwmacc_vv_w)
1775 GEN_VEXT_VV(vwmaccsu_vv_b)
1776 GEN_VEXT_VV(vwmaccsu_vv_h)
1777 GEN_VEXT_VV(vwmaccsu_vv_w)
1778 
1779 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1780 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1781 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1782 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1783 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1784 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1785 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1786 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1787 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1788 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1789 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1790 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1791 GEN_VEXT_VX(vwmaccu_vx_b)
1792 GEN_VEXT_VX(vwmaccu_vx_h)
1793 GEN_VEXT_VX(vwmaccu_vx_w)
1794 GEN_VEXT_VX(vwmacc_vx_b)
1795 GEN_VEXT_VX(vwmacc_vx_h)
1796 GEN_VEXT_VX(vwmacc_vx_w)
1797 GEN_VEXT_VX(vwmaccsu_vx_b)
1798 GEN_VEXT_VX(vwmaccsu_vx_h)
1799 GEN_VEXT_VX(vwmaccsu_vx_w)
1800 GEN_VEXT_VX(vwmaccus_vx_b)
1801 GEN_VEXT_VX(vwmaccus_vx_h)
1802 GEN_VEXT_VX(vwmaccus_vx_w)
1803 
1804 /* Vector Integer Merge and Move Instructions */
1805 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1806 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1807                   uint32_t desc)                                     \
1808 {                                                                    \
1809     uint32_t vl = env->vl;                                           \
1810     uint32_t i;                                                      \
1811                                                                      \
1812     for (i = env->vstart; i < vl; i++) {                             \
1813         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1814         *((ETYPE *)vd + H(i)) = s1;                                  \
1815     }                                                                \
1816     env->vstart = 0;                                                 \
1817 }
1818 
1819 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1820 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1821 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1822 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1823 
1824 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1825 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1826                   uint32_t desc)                                     \
1827 {                                                                    \
1828     uint32_t vl = env->vl;                                           \
1829     uint32_t i;                                                      \
1830                                                                      \
1831     for (i = env->vstart; i < vl; i++) {                             \
1832         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1833     }                                                                \
1834     env->vstart = 0;                                                 \
1835 }
1836 
1837 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1838 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1839 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1840 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1841 
1842 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1843 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1844                   CPURISCVState *env, uint32_t desc)                 \
1845 {                                                                    \
1846     uint32_t vl = env->vl;                                           \
1847     uint32_t i;                                                      \
1848                                                                      \
1849     for (i = env->vstart; i < vl; i++) {                             \
1850         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1851         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1852     }                                                                \
1853     env->vstart = 0;                                                 \
1854 }
1855 
1856 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1857 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1858 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1859 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1860 
1861 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1862 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1863                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1864 {                                                                    \
1865     uint32_t vl = env->vl;                                           \
1866     uint32_t i;                                                      \
1867                                                                      \
1868     for (i = env->vstart; i < vl; i++) {                             \
1869         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1870         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1871                    (ETYPE)(target_long)s1);                          \
1872         *((ETYPE *)vd + H(i)) = d;                                   \
1873     }                                                                \
1874     env->vstart = 0;                                                 \
1875 }
1876 
1877 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1878 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1879 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1880 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1881 
1882 /*
1883  *** Vector Fixed-Point Arithmetic Instructions
1884  */
1885 
1886 /* Vector Single-Width Saturating Add and Subtract */
1887 
1888 /*
1889  * As fixed point instructions probably have round mode and saturation,
1890  * define common macros for fixed point here.
1891  */
1892 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1893                           CPURISCVState *env, int vxrm);
1894 
1895 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1896 static inline void                                                  \
1897 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1898           CPURISCVState *env, int vxrm)                             \
1899 {                                                                   \
1900     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1901     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1902     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1903 }
1904 
1905 static inline void
1906 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1907              CPURISCVState *env,
1908              uint32_t vl, uint32_t vm, int vxrm,
1909              opivv2_rm_fn *fn)
1910 {
1911     for (uint32_t i = env->vstart; i < vl; i++) {
1912         if (!vm && !vext_elem_mask(v0, i)) {
1913             continue;
1914         }
1915         fn(vd, vs1, vs2, i, env, vxrm);
1916     }
1917     env->vstart = 0;
1918 }
1919 
1920 static inline void
1921 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1922              CPURISCVState *env,
1923              uint32_t desc,
1924              opivv2_rm_fn *fn)
1925 {
1926     uint32_t vm = vext_vm(desc);
1927     uint32_t vl = env->vl;
1928 
1929     switch (env->vxrm) {
1930     case 0: /* rnu */
1931         vext_vv_rm_1(vd, v0, vs1, vs2,
1932                      env, vl, vm, 0, fn);
1933         break;
1934     case 1: /* rne */
1935         vext_vv_rm_1(vd, v0, vs1, vs2,
1936                      env, vl, vm, 1, fn);
1937         break;
1938     case 2: /* rdn */
1939         vext_vv_rm_1(vd, v0, vs1, vs2,
1940                      env, vl, vm, 2, fn);
1941         break;
1942     default: /* rod */
1943         vext_vv_rm_1(vd, v0, vs1, vs2,
1944                      env, vl, vm, 3, fn);
1945         break;
1946     }
1947 }
1948 
1949 /* generate helpers for fixed point instructions with OPIVV format */
1950 #define GEN_VEXT_VV_RM(NAME)                                    \
1951 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1952                   CPURISCVState *env, uint32_t desc)            \
1953 {                                                               \
1954     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc,                   \
1955                  do_##NAME);                                    \
1956 }
1957 
1958 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1959 {
1960     uint8_t res = a + b;
1961     if (res < a) {
1962         res = UINT8_MAX;
1963         env->vxsat = 0x1;
1964     }
1965     return res;
1966 }
1967 
1968 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1969                                uint16_t b)
1970 {
1971     uint16_t res = a + b;
1972     if (res < a) {
1973         res = UINT16_MAX;
1974         env->vxsat = 0x1;
1975     }
1976     return res;
1977 }
1978 
1979 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1980                                uint32_t b)
1981 {
1982     uint32_t res = a + b;
1983     if (res < a) {
1984         res = UINT32_MAX;
1985         env->vxsat = 0x1;
1986     }
1987     return res;
1988 }
1989 
1990 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1991                                uint64_t b)
1992 {
1993     uint64_t res = a + b;
1994     if (res < a) {
1995         res = UINT64_MAX;
1996         env->vxsat = 0x1;
1997     }
1998     return res;
1999 }
2000 
2001 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2002 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2003 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2004 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2005 GEN_VEXT_VV_RM(vsaddu_vv_b)
2006 GEN_VEXT_VV_RM(vsaddu_vv_h)
2007 GEN_VEXT_VV_RM(vsaddu_vv_w)
2008 GEN_VEXT_VV_RM(vsaddu_vv_d)
2009 
2010 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2011                           CPURISCVState *env, int vxrm);
2012 
2013 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2014 static inline void                                                  \
2015 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2016           CPURISCVState *env, int vxrm)                             \
2017 {                                                                   \
2018     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2019     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2020 }
2021 
2022 static inline void
2023 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2024              CPURISCVState *env,
2025              uint32_t vl, uint32_t vm, int vxrm,
2026              opivx2_rm_fn *fn)
2027 {
2028     for (uint32_t i = env->vstart; i < vl; i++) {
2029         if (!vm && !vext_elem_mask(v0, i)) {
2030             continue;
2031         }
2032         fn(vd, s1, vs2, i, env, vxrm);
2033     }
2034     env->vstart = 0;
2035 }
2036 
2037 static inline void
2038 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2039              CPURISCVState *env,
2040              uint32_t desc,
2041              opivx2_rm_fn *fn)
2042 {
2043     uint32_t vm = vext_vm(desc);
2044     uint32_t vl = env->vl;
2045 
2046     switch (env->vxrm) {
2047     case 0: /* rnu */
2048         vext_vx_rm_1(vd, v0, s1, vs2,
2049                      env, vl, vm, 0, fn);
2050         break;
2051     case 1: /* rne */
2052         vext_vx_rm_1(vd, v0, s1, vs2,
2053                      env, vl, vm, 1, fn);
2054         break;
2055     case 2: /* rdn */
2056         vext_vx_rm_1(vd, v0, s1, vs2,
2057                      env, vl, vm, 2, fn);
2058         break;
2059     default: /* rod */
2060         vext_vx_rm_1(vd, v0, s1, vs2,
2061                      env, vl, vm, 3, fn);
2062         break;
2063     }
2064 }
2065 
2066 /* generate helpers for fixed point instructions with OPIVX format */
2067 #define GEN_VEXT_VX_RM(NAME)                              \
2068 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2069         void *vs2, CPURISCVState *env, uint32_t desc)     \
2070 {                                                         \
2071     vext_vx_rm_2(vd, v0, s1, vs2, env, desc,              \
2072                  do_##NAME);                              \
2073 }
2074 
2075 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2076 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2077 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2078 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2079 GEN_VEXT_VX_RM(vsaddu_vx_b)
2080 GEN_VEXT_VX_RM(vsaddu_vx_h)
2081 GEN_VEXT_VX_RM(vsaddu_vx_w)
2082 GEN_VEXT_VX_RM(vsaddu_vx_d)
2083 
2084 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2085 {
2086     int8_t res = a + b;
2087     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2088         res = a > 0 ? INT8_MAX : INT8_MIN;
2089         env->vxsat = 0x1;
2090     }
2091     return res;
2092 }
2093 
2094 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2095 {
2096     int16_t res = a + b;
2097     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2098         res = a > 0 ? INT16_MAX : INT16_MIN;
2099         env->vxsat = 0x1;
2100     }
2101     return res;
2102 }
2103 
2104 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2105 {
2106     int32_t res = a + b;
2107     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2108         res = a > 0 ? INT32_MAX : INT32_MIN;
2109         env->vxsat = 0x1;
2110     }
2111     return res;
2112 }
2113 
2114 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2115 {
2116     int64_t res = a + b;
2117     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2118         res = a > 0 ? INT64_MAX : INT64_MIN;
2119         env->vxsat = 0x1;
2120     }
2121     return res;
2122 }
2123 
2124 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2125 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2126 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2127 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2128 GEN_VEXT_VV_RM(vsadd_vv_b)
2129 GEN_VEXT_VV_RM(vsadd_vv_h)
2130 GEN_VEXT_VV_RM(vsadd_vv_w)
2131 GEN_VEXT_VV_RM(vsadd_vv_d)
2132 
2133 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2134 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2135 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2136 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2137 GEN_VEXT_VX_RM(vsadd_vx_b)
2138 GEN_VEXT_VX_RM(vsadd_vx_h)
2139 GEN_VEXT_VX_RM(vsadd_vx_w)
2140 GEN_VEXT_VX_RM(vsadd_vx_d)
2141 
2142 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2143 {
2144     uint8_t res = a - b;
2145     if (res > a) {
2146         res = 0;
2147         env->vxsat = 0x1;
2148     }
2149     return res;
2150 }
2151 
2152 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2153                                uint16_t b)
2154 {
2155     uint16_t res = a - b;
2156     if (res > a) {
2157         res = 0;
2158         env->vxsat = 0x1;
2159     }
2160     return res;
2161 }
2162 
2163 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2164                                uint32_t b)
2165 {
2166     uint32_t res = a - b;
2167     if (res > a) {
2168         res = 0;
2169         env->vxsat = 0x1;
2170     }
2171     return res;
2172 }
2173 
2174 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2175                                uint64_t b)
2176 {
2177     uint64_t res = a - b;
2178     if (res > a) {
2179         res = 0;
2180         env->vxsat = 0x1;
2181     }
2182     return res;
2183 }
2184 
2185 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2186 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2187 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2188 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2189 GEN_VEXT_VV_RM(vssubu_vv_b)
2190 GEN_VEXT_VV_RM(vssubu_vv_h)
2191 GEN_VEXT_VV_RM(vssubu_vv_w)
2192 GEN_VEXT_VV_RM(vssubu_vv_d)
2193 
2194 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2195 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2196 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2197 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2198 GEN_VEXT_VX_RM(vssubu_vx_b)
2199 GEN_VEXT_VX_RM(vssubu_vx_h)
2200 GEN_VEXT_VX_RM(vssubu_vx_w)
2201 GEN_VEXT_VX_RM(vssubu_vx_d)
2202 
2203 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2204 {
2205     int8_t res = a - b;
2206     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2207         res = a >= 0 ? INT8_MAX : INT8_MIN;
2208         env->vxsat = 0x1;
2209     }
2210     return res;
2211 }
2212 
2213 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2214 {
2215     int16_t res = a - b;
2216     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2217         res = a >= 0 ? INT16_MAX : INT16_MIN;
2218         env->vxsat = 0x1;
2219     }
2220     return res;
2221 }
2222 
2223 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2224 {
2225     int32_t res = a - b;
2226     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2227         res = a >= 0 ? INT32_MAX : INT32_MIN;
2228         env->vxsat = 0x1;
2229     }
2230     return res;
2231 }
2232 
2233 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2234 {
2235     int64_t res = a - b;
2236     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2237         res = a >= 0 ? INT64_MAX : INT64_MIN;
2238         env->vxsat = 0x1;
2239     }
2240     return res;
2241 }
2242 
2243 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2244 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2245 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2246 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2247 GEN_VEXT_VV_RM(vssub_vv_b)
2248 GEN_VEXT_VV_RM(vssub_vv_h)
2249 GEN_VEXT_VV_RM(vssub_vv_w)
2250 GEN_VEXT_VV_RM(vssub_vv_d)
2251 
2252 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2253 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2254 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2255 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2256 GEN_VEXT_VX_RM(vssub_vx_b)
2257 GEN_VEXT_VX_RM(vssub_vx_h)
2258 GEN_VEXT_VX_RM(vssub_vx_w)
2259 GEN_VEXT_VX_RM(vssub_vx_d)
2260 
2261 /* Vector Single-Width Averaging Add and Subtract */
2262 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2263 {
2264     uint8_t d = extract64(v, shift, 1);
2265     uint8_t d1;
2266     uint64_t D1, D2;
2267 
2268     if (shift == 0 || shift > 64) {
2269         return 0;
2270     }
2271 
2272     d1 = extract64(v, shift - 1, 1);
2273     D1 = extract64(v, 0, shift);
2274     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2275         return d1;
2276     } else if (vxrm == 1) { /* round-to-nearest-even */
2277         if (shift > 1) {
2278             D2 = extract64(v, 0, shift - 1);
2279             return d1 & ((D2 != 0) | d);
2280         } else {
2281             return d1 & d;
2282         }
2283     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2284         return !d & (D1 != 0);
2285     }
2286     return 0; /* round-down (truncate) */
2287 }
2288 
2289 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2290 {
2291     int64_t res = (int64_t)a + b;
2292     uint8_t round = get_round(vxrm, res, 1);
2293 
2294     return (res >> 1) + round;
2295 }
2296 
2297 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2298 {
2299     int64_t res = a + b;
2300     uint8_t round = get_round(vxrm, res, 1);
2301     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2302 
2303     /* With signed overflow, bit 64 is inverse of bit 63. */
2304     return ((res >> 1) ^ over) + round;
2305 }
2306 
2307 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2308 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2309 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2310 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2311 GEN_VEXT_VV_RM(vaadd_vv_b)
2312 GEN_VEXT_VV_RM(vaadd_vv_h)
2313 GEN_VEXT_VV_RM(vaadd_vv_w)
2314 GEN_VEXT_VV_RM(vaadd_vv_d)
2315 
2316 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2317 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2318 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2319 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2320 GEN_VEXT_VX_RM(vaadd_vx_b)
2321 GEN_VEXT_VX_RM(vaadd_vx_h)
2322 GEN_VEXT_VX_RM(vaadd_vx_w)
2323 GEN_VEXT_VX_RM(vaadd_vx_d)
2324 
2325 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2326                                uint32_t a, uint32_t b)
2327 {
2328     uint64_t res = (uint64_t)a + b;
2329     uint8_t round = get_round(vxrm, res, 1);
2330 
2331     return (res >> 1) + round;
2332 }
2333 
2334 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2335                                uint64_t a, uint64_t b)
2336 {
2337     uint64_t res = a + b;
2338     uint8_t round = get_round(vxrm, res, 1);
2339     uint64_t over = (uint64_t)(res < a) << 63;
2340 
2341     return ((res >> 1) | over) + round;
2342 }
2343 
2344 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2345 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2346 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2347 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2348 GEN_VEXT_VV_RM(vaaddu_vv_b)
2349 GEN_VEXT_VV_RM(vaaddu_vv_h)
2350 GEN_VEXT_VV_RM(vaaddu_vv_w)
2351 GEN_VEXT_VV_RM(vaaddu_vv_d)
2352 
2353 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2354 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2355 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2356 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2357 GEN_VEXT_VX_RM(vaaddu_vx_b)
2358 GEN_VEXT_VX_RM(vaaddu_vx_h)
2359 GEN_VEXT_VX_RM(vaaddu_vx_w)
2360 GEN_VEXT_VX_RM(vaaddu_vx_d)
2361 
2362 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2363 {
2364     int64_t res = (int64_t)a - b;
2365     uint8_t round = get_round(vxrm, res, 1);
2366 
2367     return (res >> 1) + round;
2368 }
2369 
2370 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2371 {
2372     int64_t res = (int64_t)a - b;
2373     uint8_t round = get_round(vxrm, res, 1);
2374     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2375 
2376     /* With signed overflow, bit 64 is inverse of bit 63. */
2377     return ((res >> 1) ^ over) + round;
2378 }
2379 
2380 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2381 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2382 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2383 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2384 GEN_VEXT_VV_RM(vasub_vv_b)
2385 GEN_VEXT_VV_RM(vasub_vv_h)
2386 GEN_VEXT_VV_RM(vasub_vv_w)
2387 GEN_VEXT_VV_RM(vasub_vv_d)
2388 
2389 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2390 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2391 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2392 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2393 GEN_VEXT_VX_RM(vasub_vx_b)
2394 GEN_VEXT_VX_RM(vasub_vx_h)
2395 GEN_VEXT_VX_RM(vasub_vx_w)
2396 GEN_VEXT_VX_RM(vasub_vx_d)
2397 
2398 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2399                                uint32_t a, uint32_t b)
2400 {
2401     int64_t res = (int64_t)a - b;
2402     uint8_t round = get_round(vxrm, res, 1);
2403 
2404     return (res >> 1) + round;
2405 }
2406 
2407 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2408                                uint64_t a, uint64_t b)
2409 {
2410     uint64_t res = (uint64_t)a - b;
2411     uint8_t round = get_round(vxrm, res, 1);
2412     uint64_t over = (uint64_t)(res > a) << 63;
2413 
2414     return ((res >> 1) | over) + round;
2415 }
2416 
2417 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2418 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2419 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2420 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2421 GEN_VEXT_VV_RM(vasubu_vv_b)
2422 GEN_VEXT_VV_RM(vasubu_vv_h)
2423 GEN_VEXT_VV_RM(vasubu_vv_w)
2424 GEN_VEXT_VV_RM(vasubu_vv_d)
2425 
2426 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2427 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2428 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2429 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2430 GEN_VEXT_VX_RM(vasubu_vx_b)
2431 GEN_VEXT_VX_RM(vasubu_vx_h)
2432 GEN_VEXT_VX_RM(vasubu_vx_w)
2433 GEN_VEXT_VX_RM(vasubu_vx_d)
2434 
2435 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2436 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2437 {
2438     uint8_t round;
2439     int16_t res;
2440 
2441     res = (int16_t)a * (int16_t)b;
2442     round = get_round(vxrm, res, 7);
2443     res   = (res >> 7) + round;
2444 
2445     if (res > INT8_MAX) {
2446         env->vxsat = 0x1;
2447         return INT8_MAX;
2448     } else if (res < INT8_MIN) {
2449         env->vxsat = 0x1;
2450         return INT8_MIN;
2451     } else {
2452         return res;
2453     }
2454 }
2455 
2456 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2457 {
2458     uint8_t round;
2459     int32_t res;
2460 
2461     res = (int32_t)a * (int32_t)b;
2462     round = get_round(vxrm, res, 15);
2463     res   = (res >> 15) + round;
2464 
2465     if (res > INT16_MAX) {
2466         env->vxsat = 0x1;
2467         return INT16_MAX;
2468     } else if (res < INT16_MIN) {
2469         env->vxsat = 0x1;
2470         return INT16_MIN;
2471     } else {
2472         return res;
2473     }
2474 }
2475 
2476 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2477 {
2478     uint8_t round;
2479     int64_t res;
2480 
2481     res = (int64_t)a * (int64_t)b;
2482     round = get_round(vxrm, res, 31);
2483     res   = (res >> 31) + round;
2484 
2485     if (res > INT32_MAX) {
2486         env->vxsat = 0x1;
2487         return INT32_MAX;
2488     } else if (res < INT32_MIN) {
2489         env->vxsat = 0x1;
2490         return INT32_MIN;
2491     } else {
2492         return res;
2493     }
2494 }
2495 
2496 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2497 {
2498     uint8_t round;
2499     uint64_t hi_64, lo_64;
2500     int64_t res;
2501 
2502     if (a == INT64_MIN && b == INT64_MIN) {
2503         env->vxsat = 1;
2504         return INT64_MAX;
2505     }
2506 
2507     muls64(&lo_64, &hi_64, a, b);
2508     round = get_round(vxrm, lo_64, 63);
2509     /*
2510      * Cannot overflow, as there are always
2511      * 2 sign bits after multiply.
2512      */
2513     res = (hi_64 << 1) | (lo_64 >> 63);
2514     if (round) {
2515         if (res == INT64_MAX) {
2516             env->vxsat = 1;
2517         } else {
2518             res += 1;
2519         }
2520     }
2521     return res;
2522 }
2523 
2524 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2525 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2526 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2527 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2528 GEN_VEXT_VV_RM(vsmul_vv_b)
2529 GEN_VEXT_VV_RM(vsmul_vv_h)
2530 GEN_VEXT_VV_RM(vsmul_vv_w)
2531 GEN_VEXT_VV_RM(vsmul_vv_d)
2532 
2533 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2534 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2535 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2536 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2537 GEN_VEXT_VX_RM(vsmul_vx_b)
2538 GEN_VEXT_VX_RM(vsmul_vx_h)
2539 GEN_VEXT_VX_RM(vsmul_vx_w)
2540 GEN_VEXT_VX_RM(vsmul_vx_d)
2541 
2542 /* Vector Single-Width Scaling Shift Instructions */
2543 static inline uint8_t
2544 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2545 {
2546     uint8_t round, shift = b & 0x7;
2547     uint8_t res;
2548 
2549     round = get_round(vxrm, a, shift);
2550     res   = (a >> shift)  + round;
2551     return res;
2552 }
2553 static inline uint16_t
2554 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2555 {
2556     uint8_t round, shift = b & 0xf;
2557     uint16_t res;
2558 
2559     round = get_round(vxrm, a, shift);
2560     res   = (a >> shift)  + round;
2561     return res;
2562 }
2563 static inline uint32_t
2564 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2565 {
2566     uint8_t round, shift = b & 0x1f;
2567     uint32_t res;
2568 
2569     round = get_round(vxrm, a, shift);
2570     res   = (a >> shift)  + round;
2571     return res;
2572 }
2573 static inline uint64_t
2574 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2575 {
2576     uint8_t round, shift = b & 0x3f;
2577     uint64_t res;
2578 
2579     round = get_round(vxrm, a, shift);
2580     res   = (a >> shift)  + round;
2581     return res;
2582 }
2583 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2584 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2585 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2586 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2587 GEN_VEXT_VV_RM(vssrl_vv_b)
2588 GEN_VEXT_VV_RM(vssrl_vv_h)
2589 GEN_VEXT_VV_RM(vssrl_vv_w)
2590 GEN_VEXT_VV_RM(vssrl_vv_d)
2591 
2592 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2593 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2594 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2595 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2596 GEN_VEXT_VX_RM(vssrl_vx_b)
2597 GEN_VEXT_VX_RM(vssrl_vx_h)
2598 GEN_VEXT_VX_RM(vssrl_vx_w)
2599 GEN_VEXT_VX_RM(vssrl_vx_d)
2600 
2601 static inline int8_t
2602 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2603 {
2604     uint8_t round, shift = b & 0x7;
2605     int8_t res;
2606 
2607     round = get_round(vxrm, a, shift);
2608     res   = (a >> shift)  + round;
2609     return res;
2610 }
2611 static inline int16_t
2612 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2613 {
2614     uint8_t round, shift = b & 0xf;
2615     int16_t res;
2616 
2617     round = get_round(vxrm, a, shift);
2618     res   = (a >> shift)  + round;
2619     return res;
2620 }
2621 static inline int32_t
2622 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2623 {
2624     uint8_t round, shift = b & 0x1f;
2625     int32_t res;
2626 
2627     round = get_round(vxrm, a, shift);
2628     res   = (a >> shift)  + round;
2629     return res;
2630 }
2631 static inline int64_t
2632 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2633 {
2634     uint8_t round, shift = b & 0x3f;
2635     int64_t res;
2636 
2637     round = get_round(vxrm, a, shift);
2638     res   = (a >> shift)  + round;
2639     return res;
2640 }
2641 
2642 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2643 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2644 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2645 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2646 GEN_VEXT_VV_RM(vssra_vv_b)
2647 GEN_VEXT_VV_RM(vssra_vv_h)
2648 GEN_VEXT_VV_RM(vssra_vv_w)
2649 GEN_VEXT_VV_RM(vssra_vv_d)
2650 
2651 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2652 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2653 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2654 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2655 GEN_VEXT_VX_RM(vssra_vx_b)
2656 GEN_VEXT_VX_RM(vssra_vx_h)
2657 GEN_VEXT_VX_RM(vssra_vx_w)
2658 GEN_VEXT_VX_RM(vssra_vx_d)
2659 
2660 /* Vector Narrowing Fixed-Point Clip Instructions */
2661 static inline int8_t
2662 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2663 {
2664     uint8_t round, shift = b & 0xf;
2665     int16_t res;
2666 
2667     round = get_round(vxrm, a, shift);
2668     res   = (a >> shift)  + round;
2669     if (res > INT8_MAX) {
2670         env->vxsat = 0x1;
2671         return INT8_MAX;
2672     } else if (res < INT8_MIN) {
2673         env->vxsat = 0x1;
2674         return INT8_MIN;
2675     } else {
2676         return res;
2677     }
2678 }
2679 
2680 static inline int16_t
2681 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2682 {
2683     uint8_t round, shift = b & 0x1f;
2684     int32_t res;
2685 
2686     round = get_round(vxrm, a, shift);
2687     res   = (a >> shift)  + round;
2688     if (res > INT16_MAX) {
2689         env->vxsat = 0x1;
2690         return INT16_MAX;
2691     } else if (res < INT16_MIN) {
2692         env->vxsat = 0x1;
2693         return INT16_MIN;
2694     } else {
2695         return res;
2696     }
2697 }
2698 
2699 static inline int32_t
2700 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2701 {
2702     uint8_t round, shift = b & 0x3f;
2703     int64_t res;
2704 
2705     round = get_round(vxrm, a, shift);
2706     res   = (a >> shift)  + round;
2707     if (res > INT32_MAX) {
2708         env->vxsat = 0x1;
2709         return INT32_MAX;
2710     } else if (res < INT32_MIN) {
2711         env->vxsat = 0x1;
2712         return INT32_MIN;
2713     } else {
2714         return res;
2715     }
2716 }
2717 
2718 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2719 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2720 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2721 GEN_VEXT_VV_RM(vnclip_wv_b)
2722 GEN_VEXT_VV_RM(vnclip_wv_h)
2723 GEN_VEXT_VV_RM(vnclip_wv_w)
2724 
2725 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2726 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2727 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2728 GEN_VEXT_VX_RM(vnclip_wx_b)
2729 GEN_VEXT_VX_RM(vnclip_wx_h)
2730 GEN_VEXT_VX_RM(vnclip_wx_w)
2731 
2732 static inline uint8_t
2733 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2734 {
2735     uint8_t round, shift = b & 0xf;
2736     uint16_t res;
2737 
2738     round = get_round(vxrm, a, shift);
2739     res   = (a >> shift)  + round;
2740     if (res > UINT8_MAX) {
2741         env->vxsat = 0x1;
2742         return UINT8_MAX;
2743     } else {
2744         return res;
2745     }
2746 }
2747 
2748 static inline uint16_t
2749 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2750 {
2751     uint8_t round, shift = b & 0x1f;
2752     uint32_t res;
2753 
2754     round = get_round(vxrm, a, shift);
2755     res   = (a >> shift)  + round;
2756     if (res > UINT16_MAX) {
2757         env->vxsat = 0x1;
2758         return UINT16_MAX;
2759     } else {
2760         return res;
2761     }
2762 }
2763 
2764 static inline uint32_t
2765 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2766 {
2767     uint8_t round, shift = b & 0x3f;
2768     uint64_t res;
2769 
2770     round = get_round(vxrm, a, shift);
2771     res   = (a >> shift)  + round;
2772     if (res > UINT32_MAX) {
2773         env->vxsat = 0x1;
2774         return UINT32_MAX;
2775     } else {
2776         return res;
2777     }
2778 }
2779 
2780 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2781 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2782 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2783 GEN_VEXT_VV_RM(vnclipu_wv_b)
2784 GEN_VEXT_VV_RM(vnclipu_wv_h)
2785 GEN_VEXT_VV_RM(vnclipu_wv_w)
2786 
2787 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2788 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2789 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2790 GEN_VEXT_VX_RM(vnclipu_wx_b)
2791 GEN_VEXT_VX_RM(vnclipu_wx_h)
2792 GEN_VEXT_VX_RM(vnclipu_wx_w)
2793 
2794 /*
2795  *** Vector Float Point Arithmetic Instructions
2796  */
2797 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2798 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2799 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2800                       CPURISCVState *env)                      \
2801 {                                                              \
2802     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2803     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2804     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2805 }
2806 
2807 #define GEN_VEXT_VV_ENV(NAME)                             \
2808 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2809                   void *vs2, CPURISCVState *env,          \
2810                   uint32_t desc)                          \
2811 {                                                         \
2812     uint32_t vm = vext_vm(desc);                          \
2813     uint32_t vl = env->vl;                                \
2814     uint32_t i;                                           \
2815                                                           \
2816     for (i = env->vstart; i < vl; i++) {                  \
2817         if (!vm && !vext_elem_mask(v0, i)) {              \
2818             continue;                                     \
2819         }                                                 \
2820         do_##NAME(vd, vs1, vs2, i, env);                  \
2821     }                                                     \
2822     env->vstart = 0;                                      \
2823 }
2824 
2825 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2826 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2827 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2828 GEN_VEXT_VV_ENV(vfadd_vv_h)
2829 GEN_VEXT_VV_ENV(vfadd_vv_w)
2830 GEN_VEXT_VV_ENV(vfadd_vv_d)
2831 
2832 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2833 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2834                       CPURISCVState *env)                      \
2835 {                                                              \
2836     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2837     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2838 }
2839 
2840 #define GEN_VEXT_VF(NAME)                                 \
2841 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2842                   void *vs2, CPURISCVState *env,          \
2843                   uint32_t desc)                          \
2844 {                                                         \
2845     uint32_t vm = vext_vm(desc);                          \
2846     uint32_t vl = env->vl;                                \
2847     uint32_t i;                                           \
2848                                                           \
2849     for (i = env->vstart; i < vl; i++) {                  \
2850         if (!vm && !vext_elem_mask(v0, i)) {              \
2851             continue;                                     \
2852         }                                                 \
2853         do_##NAME(vd, s1, vs2, i, env);                   \
2854     }                                                     \
2855     env->vstart = 0;                                      \
2856 }
2857 
2858 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2859 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2860 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2861 GEN_VEXT_VF(vfadd_vf_h)
2862 GEN_VEXT_VF(vfadd_vf_w)
2863 GEN_VEXT_VF(vfadd_vf_d)
2864 
2865 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2866 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2867 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2868 GEN_VEXT_VV_ENV(vfsub_vv_h)
2869 GEN_VEXT_VV_ENV(vfsub_vv_w)
2870 GEN_VEXT_VV_ENV(vfsub_vv_d)
2871 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2872 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2873 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2874 GEN_VEXT_VF(vfsub_vf_h)
2875 GEN_VEXT_VF(vfsub_vf_w)
2876 GEN_VEXT_VF(vfsub_vf_d)
2877 
2878 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2879 {
2880     return float16_sub(b, a, s);
2881 }
2882 
2883 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2884 {
2885     return float32_sub(b, a, s);
2886 }
2887 
2888 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2889 {
2890     return float64_sub(b, a, s);
2891 }
2892 
2893 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2894 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2895 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2896 GEN_VEXT_VF(vfrsub_vf_h)
2897 GEN_VEXT_VF(vfrsub_vf_w)
2898 GEN_VEXT_VF(vfrsub_vf_d)
2899 
2900 /* Vector Widening Floating-Point Add/Subtract Instructions */
2901 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2902 {
2903     return float32_add(float16_to_float32(a, true, s),
2904             float16_to_float32(b, true, s), s);
2905 }
2906 
2907 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2908 {
2909     return float64_add(float32_to_float64(a, s),
2910             float32_to_float64(b, s), s);
2911 
2912 }
2913 
2914 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2915 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2916 GEN_VEXT_VV_ENV(vfwadd_vv_h)
2917 GEN_VEXT_VV_ENV(vfwadd_vv_w)
2918 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2919 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2920 GEN_VEXT_VF(vfwadd_vf_h)
2921 GEN_VEXT_VF(vfwadd_vf_w)
2922 
2923 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2924 {
2925     return float32_sub(float16_to_float32(a, true, s),
2926             float16_to_float32(b, true, s), s);
2927 }
2928 
2929 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2930 {
2931     return float64_sub(float32_to_float64(a, s),
2932             float32_to_float64(b, s), s);
2933 
2934 }
2935 
2936 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2937 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2938 GEN_VEXT_VV_ENV(vfwsub_vv_h)
2939 GEN_VEXT_VV_ENV(vfwsub_vv_w)
2940 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2941 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2942 GEN_VEXT_VF(vfwsub_vf_h)
2943 GEN_VEXT_VF(vfwsub_vf_w)
2944 
2945 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2946 {
2947     return float32_add(a, float16_to_float32(b, true, s), s);
2948 }
2949 
2950 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2951 {
2952     return float64_add(a, float32_to_float64(b, s), s);
2953 }
2954 
2955 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2956 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2957 GEN_VEXT_VV_ENV(vfwadd_wv_h)
2958 GEN_VEXT_VV_ENV(vfwadd_wv_w)
2959 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2960 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2961 GEN_VEXT_VF(vfwadd_wf_h)
2962 GEN_VEXT_VF(vfwadd_wf_w)
2963 
2964 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2965 {
2966     return float32_sub(a, float16_to_float32(b, true, s), s);
2967 }
2968 
2969 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2970 {
2971     return float64_sub(a, float32_to_float64(b, s), s);
2972 }
2973 
2974 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2975 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2976 GEN_VEXT_VV_ENV(vfwsub_wv_h)
2977 GEN_VEXT_VV_ENV(vfwsub_wv_w)
2978 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2979 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2980 GEN_VEXT_VF(vfwsub_wf_h)
2981 GEN_VEXT_VF(vfwsub_wf_w)
2982 
2983 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2984 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2985 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2986 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2987 GEN_VEXT_VV_ENV(vfmul_vv_h)
2988 GEN_VEXT_VV_ENV(vfmul_vv_w)
2989 GEN_VEXT_VV_ENV(vfmul_vv_d)
2990 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2991 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2992 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2993 GEN_VEXT_VF(vfmul_vf_h)
2994 GEN_VEXT_VF(vfmul_vf_w)
2995 GEN_VEXT_VF(vfmul_vf_d)
2996 
2997 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2998 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2999 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3000 GEN_VEXT_VV_ENV(vfdiv_vv_h)
3001 GEN_VEXT_VV_ENV(vfdiv_vv_w)
3002 GEN_VEXT_VV_ENV(vfdiv_vv_d)
3003 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3004 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3005 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3006 GEN_VEXT_VF(vfdiv_vf_h)
3007 GEN_VEXT_VF(vfdiv_vf_w)
3008 GEN_VEXT_VF(vfdiv_vf_d)
3009 
3010 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3011 {
3012     return float16_div(b, a, s);
3013 }
3014 
3015 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3016 {
3017     return float32_div(b, a, s);
3018 }
3019 
3020 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3021 {
3022     return float64_div(b, a, s);
3023 }
3024 
3025 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3026 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3027 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3028 GEN_VEXT_VF(vfrdiv_vf_h)
3029 GEN_VEXT_VF(vfrdiv_vf_w)
3030 GEN_VEXT_VF(vfrdiv_vf_d)
3031 
3032 /* Vector Widening Floating-Point Multiply */
3033 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3034 {
3035     return float32_mul(float16_to_float32(a, true, s),
3036             float16_to_float32(b, true, s), s);
3037 }
3038 
3039 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3040 {
3041     return float64_mul(float32_to_float64(a, s),
3042             float32_to_float64(b, s), s);
3043 
3044 }
3045 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3046 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3047 GEN_VEXT_VV_ENV(vfwmul_vv_h)
3048 GEN_VEXT_VV_ENV(vfwmul_vv_w)
3049 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3050 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3051 GEN_VEXT_VF(vfwmul_vf_h)
3052 GEN_VEXT_VF(vfwmul_vf_w)
3053 
3054 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3055 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3056 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3057         CPURISCVState *env)                                        \
3058 {                                                                  \
3059     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3060     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3061     TD d = *((TD *)vd + HD(i));                                    \
3062     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3063 }
3064 
3065 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3066 {
3067     return float16_muladd(a, b, d, 0, s);
3068 }
3069 
3070 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3071 {
3072     return float32_muladd(a, b, d, 0, s);
3073 }
3074 
3075 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3076 {
3077     return float64_muladd(a, b, d, 0, s);
3078 }
3079 
3080 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3081 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3082 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3083 GEN_VEXT_VV_ENV(vfmacc_vv_h)
3084 GEN_VEXT_VV_ENV(vfmacc_vv_w)
3085 GEN_VEXT_VV_ENV(vfmacc_vv_d)
3086 
3087 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3088 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3089         CPURISCVState *env)                                       \
3090 {                                                                 \
3091     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3092     TD d = *((TD *)vd + HD(i));                                   \
3093     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3094 }
3095 
3096 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3097 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3098 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3099 GEN_VEXT_VF(vfmacc_vf_h)
3100 GEN_VEXT_VF(vfmacc_vf_w)
3101 GEN_VEXT_VF(vfmacc_vf_d)
3102 
3103 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3104 {
3105     return float16_muladd(a, b, d,
3106             float_muladd_negate_c | float_muladd_negate_product, s);
3107 }
3108 
3109 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3110 {
3111     return float32_muladd(a, b, d,
3112             float_muladd_negate_c | float_muladd_negate_product, s);
3113 }
3114 
3115 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3116 {
3117     return float64_muladd(a, b, d,
3118             float_muladd_negate_c | float_muladd_negate_product, s);
3119 }
3120 
3121 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3122 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3123 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3124 GEN_VEXT_VV_ENV(vfnmacc_vv_h)
3125 GEN_VEXT_VV_ENV(vfnmacc_vv_w)
3126 GEN_VEXT_VV_ENV(vfnmacc_vv_d)
3127 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3128 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3129 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3130 GEN_VEXT_VF(vfnmacc_vf_h)
3131 GEN_VEXT_VF(vfnmacc_vf_w)
3132 GEN_VEXT_VF(vfnmacc_vf_d)
3133 
3134 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3135 {
3136     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3137 }
3138 
3139 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3140 {
3141     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3142 }
3143 
3144 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3145 {
3146     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3147 }
3148 
3149 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3150 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3151 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3152 GEN_VEXT_VV_ENV(vfmsac_vv_h)
3153 GEN_VEXT_VV_ENV(vfmsac_vv_w)
3154 GEN_VEXT_VV_ENV(vfmsac_vv_d)
3155 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3156 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3157 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3158 GEN_VEXT_VF(vfmsac_vf_h)
3159 GEN_VEXT_VF(vfmsac_vf_w)
3160 GEN_VEXT_VF(vfmsac_vf_d)
3161 
3162 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3163 {
3164     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3165 }
3166 
3167 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3168 {
3169     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3170 }
3171 
3172 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3173 {
3174     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3175 }
3176 
3177 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3178 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3179 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3180 GEN_VEXT_VV_ENV(vfnmsac_vv_h)
3181 GEN_VEXT_VV_ENV(vfnmsac_vv_w)
3182 GEN_VEXT_VV_ENV(vfnmsac_vv_d)
3183 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3184 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3185 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3186 GEN_VEXT_VF(vfnmsac_vf_h)
3187 GEN_VEXT_VF(vfnmsac_vf_w)
3188 GEN_VEXT_VF(vfnmsac_vf_d)
3189 
3190 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3191 {
3192     return float16_muladd(d, b, a, 0, s);
3193 }
3194 
3195 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3196 {
3197     return float32_muladd(d, b, a, 0, s);
3198 }
3199 
3200 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3201 {
3202     return float64_muladd(d, b, a, 0, s);
3203 }
3204 
3205 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3206 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3207 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3208 GEN_VEXT_VV_ENV(vfmadd_vv_h)
3209 GEN_VEXT_VV_ENV(vfmadd_vv_w)
3210 GEN_VEXT_VV_ENV(vfmadd_vv_d)
3211 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3212 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3213 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3214 GEN_VEXT_VF(vfmadd_vf_h)
3215 GEN_VEXT_VF(vfmadd_vf_w)
3216 GEN_VEXT_VF(vfmadd_vf_d)
3217 
3218 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3219 {
3220     return float16_muladd(d, b, a,
3221             float_muladd_negate_c | float_muladd_negate_product, s);
3222 }
3223 
3224 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3225 {
3226     return float32_muladd(d, b, a,
3227             float_muladd_negate_c | float_muladd_negate_product, s);
3228 }
3229 
3230 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3231 {
3232     return float64_muladd(d, b, a,
3233             float_muladd_negate_c | float_muladd_negate_product, s);
3234 }
3235 
3236 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3237 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3238 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3239 GEN_VEXT_VV_ENV(vfnmadd_vv_h)
3240 GEN_VEXT_VV_ENV(vfnmadd_vv_w)
3241 GEN_VEXT_VV_ENV(vfnmadd_vv_d)
3242 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3243 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3244 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3245 GEN_VEXT_VF(vfnmadd_vf_h)
3246 GEN_VEXT_VF(vfnmadd_vf_w)
3247 GEN_VEXT_VF(vfnmadd_vf_d)
3248 
3249 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3250 {
3251     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3252 }
3253 
3254 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3255 {
3256     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3257 }
3258 
3259 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3260 {
3261     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3262 }
3263 
3264 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3265 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3266 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3267 GEN_VEXT_VV_ENV(vfmsub_vv_h)
3268 GEN_VEXT_VV_ENV(vfmsub_vv_w)
3269 GEN_VEXT_VV_ENV(vfmsub_vv_d)
3270 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3271 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3272 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3273 GEN_VEXT_VF(vfmsub_vf_h)
3274 GEN_VEXT_VF(vfmsub_vf_w)
3275 GEN_VEXT_VF(vfmsub_vf_d)
3276 
3277 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3278 {
3279     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3280 }
3281 
3282 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3283 {
3284     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3285 }
3286 
3287 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3288 {
3289     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3290 }
3291 
3292 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3293 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3294 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3295 GEN_VEXT_VV_ENV(vfnmsub_vv_h)
3296 GEN_VEXT_VV_ENV(vfnmsub_vv_w)
3297 GEN_VEXT_VV_ENV(vfnmsub_vv_d)
3298 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3299 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3300 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3301 GEN_VEXT_VF(vfnmsub_vf_h)
3302 GEN_VEXT_VF(vfnmsub_vf_w)
3303 GEN_VEXT_VF(vfnmsub_vf_d)
3304 
3305 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3306 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3307 {
3308     return float32_muladd(float16_to_float32(a, true, s),
3309                         float16_to_float32(b, true, s), d, 0, s);
3310 }
3311 
3312 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3313 {
3314     return float64_muladd(float32_to_float64(a, s),
3315                         float32_to_float64(b, s), d, 0, s);
3316 }
3317 
3318 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3319 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3320 GEN_VEXT_VV_ENV(vfwmacc_vv_h)
3321 GEN_VEXT_VV_ENV(vfwmacc_vv_w)
3322 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3323 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3324 GEN_VEXT_VF(vfwmacc_vf_h)
3325 GEN_VEXT_VF(vfwmacc_vf_w)
3326 
3327 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3328 {
3329     return float32_muladd(float16_to_float32(a, true, s),
3330                         float16_to_float32(b, true, s), d,
3331                         float_muladd_negate_c | float_muladd_negate_product, s);
3332 }
3333 
3334 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3335 {
3336     return float64_muladd(float32_to_float64(a, s),
3337                         float32_to_float64(b, s), d,
3338                         float_muladd_negate_c | float_muladd_negate_product, s);
3339 }
3340 
3341 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3342 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3343 GEN_VEXT_VV_ENV(vfwnmacc_vv_h)
3344 GEN_VEXT_VV_ENV(vfwnmacc_vv_w)
3345 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3346 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3347 GEN_VEXT_VF(vfwnmacc_vf_h)
3348 GEN_VEXT_VF(vfwnmacc_vf_w)
3349 
3350 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3351 {
3352     return float32_muladd(float16_to_float32(a, true, s),
3353                         float16_to_float32(b, true, s), d,
3354                         float_muladd_negate_c, s);
3355 }
3356 
3357 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3358 {
3359     return float64_muladd(float32_to_float64(a, s),
3360                         float32_to_float64(b, s), d,
3361                         float_muladd_negate_c, s);
3362 }
3363 
3364 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3365 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3366 GEN_VEXT_VV_ENV(vfwmsac_vv_h)
3367 GEN_VEXT_VV_ENV(vfwmsac_vv_w)
3368 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3369 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3370 GEN_VEXT_VF(vfwmsac_vf_h)
3371 GEN_VEXT_VF(vfwmsac_vf_w)
3372 
3373 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3374 {
3375     return float32_muladd(float16_to_float32(a, true, s),
3376                         float16_to_float32(b, true, s), d,
3377                         float_muladd_negate_product, s);
3378 }
3379 
3380 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3381 {
3382     return float64_muladd(float32_to_float64(a, s),
3383                         float32_to_float64(b, s), d,
3384                         float_muladd_negate_product, s);
3385 }
3386 
3387 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3388 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3389 GEN_VEXT_VV_ENV(vfwnmsac_vv_h)
3390 GEN_VEXT_VV_ENV(vfwnmsac_vv_w)
3391 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3392 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3393 GEN_VEXT_VF(vfwnmsac_vf_h)
3394 GEN_VEXT_VF(vfwnmsac_vf_w)
3395 
3396 /* Vector Floating-Point Square-Root Instruction */
3397 /* (TD, T2, TX2) */
3398 #define OP_UU_H uint16_t, uint16_t, uint16_t
3399 #define OP_UU_W uint32_t, uint32_t, uint32_t
3400 #define OP_UU_D uint64_t, uint64_t, uint64_t
3401 
3402 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3403 static void do_##NAME(void *vd, void *vs2, int i,      \
3404         CPURISCVState *env)                            \
3405 {                                                      \
3406     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3407     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3408 }
3409 
3410 #define GEN_VEXT_V_ENV(NAME)                           \
3411 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3412         CPURISCVState *env, uint32_t desc)             \
3413 {                                                      \
3414     uint32_t vm = vext_vm(desc);                       \
3415     uint32_t vl = env->vl;                             \
3416     uint32_t i;                                        \
3417                                                        \
3418     if (vl == 0) {                                     \
3419         return;                                        \
3420     }                                                  \
3421     for (i = env->vstart; i < vl; i++) {               \
3422         if (!vm && !vext_elem_mask(v0, i)) {           \
3423             continue;                                  \
3424         }                                              \
3425         do_##NAME(vd, vs2, i, env);                    \
3426     }                                                  \
3427     env->vstart = 0;                                   \
3428 }
3429 
3430 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3431 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3432 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3433 GEN_VEXT_V_ENV(vfsqrt_v_h)
3434 GEN_VEXT_V_ENV(vfsqrt_v_w)
3435 GEN_VEXT_V_ENV(vfsqrt_v_d)
3436 
3437 /*
3438  * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3439  *
3440  * Adapted from riscv-v-spec recip.c:
3441  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3442  */
3443 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3444 {
3445     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3446     uint64_t exp = extract64(f, frac_size, exp_size);
3447     uint64_t frac = extract64(f, 0, frac_size);
3448 
3449     const uint8_t lookup_table[] = {
3450         52, 51, 50, 48, 47, 46, 44, 43,
3451         42, 41, 40, 39, 38, 36, 35, 34,
3452         33, 32, 31, 30, 30, 29, 28, 27,
3453         26, 25, 24, 23, 23, 22, 21, 20,
3454         19, 19, 18, 17, 16, 16, 15, 14,
3455         14, 13, 12, 12, 11, 10, 10, 9,
3456         9, 8, 7, 7, 6, 6, 5, 4,
3457         4, 3, 3, 2, 2, 1, 1, 0,
3458         127, 125, 123, 121, 119, 118, 116, 114,
3459         113, 111, 109, 108, 106, 105, 103, 102,
3460         100, 99, 97, 96, 95, 93, 92, 91,
3461         90, 88, 87, 86, 85, 84, 83, 82,
3462         80, 79, 78, 77, 76, 75, 74, 73,
3463         72, 71, 70, 70, 69, 68, 67, 66,
3464         65, 64, 63, 63, 62, 61, 60, 59,
3465         59, 58, 57, 56, 56, 55, 54, 53
3466     };
3467     const int precision = 7;
3468 
3469     if (exp == 0 && frac != 0) { /* subnormal */
3470         /* Normalize the subnormal. */
3471         while (extract64(frac, frac_size - 1, 1) == 0) {
3472             exp--;
3473             frac <<= 1;
3474         }
3475 
3476         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3477     }
3478 
3479     int idx = ((exp & 1) << (precision - 1)) |
3480                 (frac >> (frac_size - precision + 1));
3481     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3482                             (frac_size - precision);
3483     uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3484 
3485     uint64_t val = 0;
3486     val = deposit64(val, 0, frac_size, out_frac);
3487     val = deposit64(val, frac_size, exp_size, out_exp);
3488     val = deposit64(val, frac_size + exp_size, 1, sign);
3489     return val;
3490 }
3491 
3492 static float16 frsqrt7_h(float16 f, float_status *s)
3493 {
3494     int exp_size = 5, frac_size = 10;
3495     bool sign = float16_is_neg(f);
3496 
3497     /*
3498      * frsqrt7(sNaN) = canonical NaN
3499      * frsqrt7(-inf) = canonical NaN
3500      * frsqrt7(-normal) = canonical NaN
3501      * frsqrt7(-subnormal) = canonical NaN
3502      */
3503     if (float16_is_signaling_nan(f, s) ||
3504             (float16_is_infinity(f) && sign) ||
3505             (float16_is_normal(f) && sign) ||
3506             (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3507         s->float_exception_flags |= float_flag_invalid;
3508         return float16_default_nan(s);
3509     }
3510 
3511     /* frsqrt7(qNaN) = canonical NaN */
3512     if (float16_is_quiet_nan(f, s)) {
3513         return float16_default_nan(s);
3514     }
3515 
3516     /* frsqrt7(+-0) = +-inf */
3517     if (float16_is_zero(f)) {
3518         s->float_exception_flags |= float_flag_divbyzero;
3519         return float16_set_sign(float16_infinity, sign);
3520     }
3521 
3522     /* frsqrt7(+inf) = +0 */
3523     if (float16_is_infinity(f) && !sign) {
3524         return float16_set_sign(float16_zero, sign);
3525     }
3526 
3527     /* +normal, +subnormal */
3528     uint64_t val = frsqrt7(f, exp_size, frac_size);
3529     return make_float16(val);
3530 }
3531 
3532 static float32 frsqrt7_s(float32 f, float_status *s)
3533 {
3534     int exp_size = 8, frac_size = 23;
3535     bool sign = float32_is_neg(f);
3536 
3537     /*
3538      * frsqrt7(sNaN) = canonical NaN
3539      * frsqrt7(-inf) = canonical NaN
3540      * frsqrt7(-normal) = canonical NaN
3541      * frsqrt7(-subnormal) = canonical NaN
3542      */
3543     if (float32_is_signaling_nan(f, s) ||
3544             (float32_is_infinity(f) && sign) ||
3545             (float32_is_normal(f) && sign) ||
3546             (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3547         s->float_exception_flags |= float_flag_invalid;
3548         return float32_default_nan(s);
3549     }
3550 
3551     /* frsqrt7(qNaN) = canonical NaN */
3552     if (float32_is_quiet_nan(f, s)) {
3553         return float32_default_nan(s);
3554     }
3555 
3556     /* frsqrt7(+-0) = +-inf */
3557     if (float32_is_zero(f)) {
3558         s->float_exception_flags |= float_flag_divbyzero;
3559         return float32_set_sign(float32_infinity, sign);
3560     }
3561 
3562     /* frsqrt7(+inf) = +0 */
3563     if (float32_is_infinity(f) && !sign) {
3564         return float32_set_sign(float32_zero, sign);
3565     }
3566 
3567     /* +normal, +subnormal */
3568     uint64_t val = frsqrt7(f, exp_size, frac_size);
3569     return make_float32(val);
3570 }
3571 
3572 static float64 frsqrt7_d(float64 f, float_status *s)
3573 {
3574     int exp_size = 11, frac_size = 52;
3575     bool sign = float64_is_neg(f);
3576 
3577     /*
3578      * frsqrt7(sNaN) = canonical NaN
3579      * frsqrt7(-inf) = canonical NaN
3580      * frsqrt7(-normal) = canonical NaN
3581      * frsqrt7(-subnormal) = canonical NaN
3582      */
3583     if (float64_is_signaling_nan(f, s) ||
3584             (float64_is_infinity(f) && sign) ||
3585             (float64_is_normal(f) && sign) ||
3586             (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3587         s->float_exception_flags |= float_flag_invalid;
3588         return float64_default_nan(s);
3589     }
3590 
3591     /* frsqrt7(qNaN) = canonical NaN */
3592     if (float64_is_quiet_nan(f, s)) {
3593         return float64_default_nan(s);
3594     }
3595 
3596     /* frsqrt7(+-0) = +-inf */
3597     if (float64_is_zero(f)) {
3598         s->float_exception_flags |= float_flag_divbyzero;
3599         return float64_set_sign(float64_infinity, sign);
3600     }
3601 
3602     /* frsqrt7(+inf) = +0 */
3603     if (float64_is_infinity(f) && !sign) {
3604         return float64_set_sign(float64_zero, sign);
3605     }
3606 
3607     /* +normal, +subnormal */
3608     uint64_t val = frsqrt7(f, exp_size, frac_size);
3609     return make_float64(val);
3610 }
3611 
3612 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3613 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3614 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3615 GEN_VEXT_V_ENV(vfrsqrt7_v_h)
3616 GEN_VEXT_V_ENV(vfrsqrt7_v_w)
3617 GEN_VEXT_V_ENV(vfrsqrt7_v_d)
3618 
3619 /*
3620  * Vector Floating-Point Reciprocal Estimate Instruction
3621  *
3622  * Adapted from riscv-v-spec recip.c:
3623  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3624  */
3625 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3626                       float_status *s)
3627 {
3628     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3629     uint64_t exp = extract64(f, frac_size, exp_size);
3630     uint64_t frac = extract64(f, 0, frac_size);
3631 
3632     const uint8_t lookup_table[] = {
3633         127, 125, 123, 121, 119, 117, 116, 114,
3634         112, 110, 109, 107, 105, 104, 102, 100,
3635         99, 97, 96, 94, 93, 91, 90, 88,
3636         87, 85, 84, 83, 81, 80, 79, 77,
3637         76, 75, 74, 72, 71, 70, 69, 68,
3638         66, 65, 64, 63, 62, 61, 60, 59,
3639         58, 57, 56, 55, 54, 53, 52, 51,
3640         50, 49, 48, 47, 46, 45, 44, 43,
3641         42, 41, 40, 40, 39, 38, 37, 36,
3642         35, 35, 34, 33, 32, 31, 31, 30,
3643         29, 28, 28, 27, 26, 25, 25, 24,
3644         23, 23, 22, 21, 21, 20, 19, 19,
3645         18, 17, 17, 16, 15, 15, 14, 14,
3646         13, 12, 12, 11, 11, 10, 9, 9,
3647         8, 8, 7, 7, 6, 5, 5, 4,
3648         4, 3, 3, 2, 2, 1, 1, 0
3649     };
3650     const int precision = 7;
3651 
3652     if (exp == 0 && frac != 0) { /* subnormal */
3653         /* Normalize the subnormal. */
3654         while (extract64(frac, frac_size - 1, 1) == 0) {
3655             exp--;
3656             frac <<= 1;
3657         }
3658 
3659         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3660 
3661         if (exp != 0 && exp != UINT64_MAX) {
3662             /*
3663              * Overflow to inf or max value of same sign,
3664              * depending on sign and rounding mode.
3665              */
3666             s->float_exception_flags |= (float_flag_inexact |
3667                                          float_flag_overflow);
3668 
3669             if ((s->float_rounding_mode == float_round_to_zero) ||
3670                 ((s->float_rounding_mode == float_round_down) && !sign) ||
3671                 ((s->float_rounding_mode == float_round_up) && sign)) {
3672                 /* Return greatest/negative finite value. */
3673                 return (sign << (exp_size + frac_size)) |
3674                     (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3675             } else {
3676                 /* Return +-inf. */
3677                 return (sign << (exp_size + frac_size)) |
3678                     MAKE_64BIT_MASK(frac_size, exp_size);
3679             }
3680         }
3681     }
3682 
3683     int idx = frac >> (frac_size - precision);
3684     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3685                             (frac_size - precision);
3686     uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3687 
3688     if (out_exp == 0 || out_exp == UINT64_MAX) {
3689         /*
3690          * The result is subnormal, but don't raise the underflow exception,
3691          * because there's no additional loss of precision.
3692          */
3693         out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3694         if (out_exp == UINT64_MAX) {
3695             out_frac >>= 1;
3696             out_exp = 0;
3697         }
3698     }
3699 
3700     uint64_t val = 0;
3701     val = deposit64(val, 0, frac_size, out_frac);
3702     val = deposit64(val, frac_size, exp_size, out_exp);
3703     val = deposit64(val, frac_size + exp_size, 1, sign);
3704     return val;
3705 }
3706 
3707 static float16 frec7_h(float16 f, float_status *s)
3708 {
3709     int exp_size = 5, frac_size = 10;
3710     bool sign = float16_is_neg(f);
3711 
3712     /* frec7(+-inf) = +-0 */
3713     if (float16_is_infinity(f)) {
3714         return float16_set_sign(float16_zero, sign);
3715     }
3716 
3717     /* frec7(+-0) = +-inf */
3718     if (float16_is_zero(f)) {
3719         s->float_exception_flags |= float_flag_divbyzero;
3720         return float16_set_sign(float16_infinity, sign);
3721     }
3722 
3723     /* frec7(sNaN) = canonical NaN */
3724     if (float16_is_signaling_nan(f, s)) {
3725         s->float_exception_flags |= float_flag_invalid;
3726         return float16_default_nan(s);
3727     }
3728 
3729     /* frec7(qNaN) = canonical NaN */
3730     if (float16_is_quiet_nan(f, s)) {
3731         return float16_default_nan(s);
3732     }
3733 
3734     /* +-normal, +-subnormal */
3735     uint64_t val = frec7(f, exp_size, frac_size, s);
3736     return make_float16(val);
3737 }
3738 
3739 static float32 frec7_s(float32 f, float_status *s)
3740 {
3741     int exp_size = 8, frac_size = 23;
3742     bool sign = float32_is_neg(f);
3743 
3744     /* frec7(+-inf) = +-0 */
3745     if (float32_is_infinity(f)) {
3746         return float32_set_sign(float32_zero, sign);
3747     }
3748 
3749     /* frec7(+-0) = +-inf */
3750     if (float32_is_zero(f)) {
3751         s->float_exception_flags |= float_flag_divbyzero;
3752         return float32_set_sign(float32_infinity, sign);
3753     }
3754 
3755     /* frec7(sNaN) = canonical NaN */
3756     if (float32_is_signaling_nan(f, s)) {
3757         s->float_exception_flags |= float_flag_invalid;
3758         return float32_default_nan(s);
3759     }
3760 
3761     /* frec7(qNaN) = canonical NaN */
3762     if (float32_is_quiet_nan(f, s)) {
3763         return float32_default_nan(s);
3764     }
3765 
3766     /* +-normal, +-subnormal */
3767     uint64_t val = frec7(f, exp_size, frac_size, s);
3768     return make_float32(val);
3769 }
3770 
3771 static float64 frec7_d(float64 f, float_status *s)
3772 {
3773     int exp_size = 11, frac_size = 52;
3774     bool sign = float64_is_neg(f);
3775 
3776     /* frec7(+-inf) = +-0 */
3777     if (float64_is_infinity(f)) {
3778         return float64_set_sign(float64_zero, sign);
3779     }
3780 
3781     /* frec7(+-0) = +-inf */
3782     if (float64_is_zero(f)) {
3783         s->float_exception_flags |= float_flag_divbyzero;
3784         return float64_set_sign(float64_infinity, sign);
3785     }
3786 
3787     /* frec7(sNaN) = canonical NaN */
3788     if (float64_is_signaling_nan(f, s)) {
3789         s->float_exception_flags |= float_flag_invalid;
3790         return float64_default_nan(s);
3791     }
3792 
3793     /* frec7(qNaN) = canonical NaN */
3794     if (float64_is_quiet_nan(f, s)) {
3795         return float64_default_nan(s);
3796     }
3797 
3798     /* +-normal, +-subnormal */
3799     uint64_t val = frec7(f, exp_size, frac_size, s);
3800     return make_float64(val);
3801 }
3802 
3803 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3804 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3805 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3806 GEN_VEXT_V_ENV(vfrec7_v_h)
3807 GEN_VEXT_V_ENV(vfrec7_v_w)
3808 GEN_VEXT_V_ENV(vfrec7_v_d)
3809 
3810 /* Vector Floating-Point MIN/MAX Instructions */
3811 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3812 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3813 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3814 GEN_VEXT_VV_ENV(vfmin_vv_h)
3815 GEN_VEXT_VV_ENV(vfmin_vv_w)
3816 GEN_VEXT_VV_ENV(vfmin_vv_d)
3817 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3818 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3819 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3820 GEN_VEXT_VF(vfmin_vf_h)
3821 GEN_VEXT_VF(vfmin_vf_w)
3822 GEN_VEXT_VF(vfmin_vf_d)
3823 
3824 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3825 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3826 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3827 GEN_VEXT_VV_ENV(vfmax_vv_h)
3828 GEN_VEXT_VV_ENV(vfmax_vv_w)
3829 GEN_VEXT_VV_ENV(vfmax_vv_d)
3830 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3831 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3832 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3833 GEN_VEXT_VF(vfmax_vf_h)
3834 GEN_VEXT_VF(vfmax_vf_w)
3835 GEN_VEXT_VF(vfmax_vf_d)
3836 
3837 /* Vector Floating-Point Sign-Injection Instructions */
3838 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3839 {
3840     return deposit64(b, 0, 15, a);
3841 }
3842 
3843 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3844 {
3845     return deposit64(b, 0, 31, a);
3846 }
3847 
3848 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3849 {
3850     return deposit64(b, 0, 63, a);
3851 }
3852 
3853 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3854 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3855 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3856 GEN_VEXT_VV_ENV(vfsgnj_vv_h)
3857 GEN_VEXT_VV_ENV(vfsgnj_vv_w)
3858 GEN_VEXT_VV_ENV(vfsgnj_vv_d)
3859 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3860 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3861 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3862 GEN_VEXT_VF(vfsgnj_vf_h)
3863 GEN_VEXT_VF(vfsgnj_vf_w)
3864 GEN_VEXT_VF(vfsgnj_vf_d)
3865 
3866 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3867 {
3868     return deposit64(~b, 0, 15, a);
3869 }
3870 
3871 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3872 {
3873     return deposit64(~b, 0, 31, a);
3874 }
3875 
3876 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3877 {
3878     return deposit64(~b, 0, 63, a);
3879 }
3880 
3881 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3882 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3883 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3884 GEN_VEXT_VV_ENV(vfsgnjn_vv_h)
3885 GEN_VEXT_VV_ENV(vfsgnjn_vv_w)
3886 GEN_VEXT_VV_ENV(vfsgnjn_vv_d)
3887 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3888 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3889 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3890 GEN_VEXT_VF(vfsgnjn_vf_h)
3891 GEN_VEXT_VF(vfsgnjn_vf_w)
3892 GEN_VEXT_VF(vfsgnjn_vf_d)
3893 
3894 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3895 {
3896     return deposit64(b ^ a, 0, 15, a);
3897 }
3898 
3899 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3900 {
3901     return deposit64(b ^ a, 0, 31, a);
3902 }
3903 
3904 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3905 {
3906     return deposit64(b ^ a, 0, 63, a);
3907 }
3908 
3909 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3910 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3911 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3912 GEN_VEXT_VV_ENV(vfsgnjx_vv_h)
3913 GEN_VEXT_VV_ENV(vfsgnjx_vv_w)
3914 GEN_VEXT_VV_ENV(vfsgnjx_vv_d)
3915 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3916 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3917 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3918 GEN_VEXT_VF(vfsgnjx_vf_h)
3919 GEN_VEXT_VF(vfsgnjx_vf_w)
3920 GEN_VEXT_VF(vfsgnjx_vf_d)
3921 
3922 /* Vector Floating-Point Compare Instructions */
3923 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3924 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3925                   CPURISCVState *env, uint32_t desc)          \
3926 {                                                             \
3927     uint32_t vm = vext_vm(desc);                              \
3928     uint32_t vl = env->vl;                                    \
3929     uint32_t i;                                               \
3930                                                               \
3931     for (i = env->vstart; i < vl; i++) {                      \
3932         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3933         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3934         if (!vm && !vext_elem_mask(v0, i)) {                  \
3935             continue;                                         \
3936         }                                                     \
3937         vext_set_elem_mask(vd, i,                             \
3938                            DO_OP(s2, s1, &env->fp_status));   \
3939     }                                                         \
3940     env->vstart = 0;                                          \
3941 }
3942 
3943 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3944 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3945 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3946 
3947 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3948 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3949                   CPURISCVState *env, uint32_t desc)                \
3950 {                                                                   \
3951     uint32_t vm = vext_vm(desc);                                    \
3952     uint32_t vl = env->vl;                                          \
3953     uint32_t i;                                                     \
3954                                                                     \
3955     for (i = env->vstart; i < vl; i++) {                            \
3956         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3957         if (!vm && !vext_elem_mask(v0, i)) {                        \
3958             continue;                                               \
3959         }                                                           \
3960         vext_set_elem_mask(vd, i,                                   \
3961                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3962     }                                                               \
3963     env->vstart = 0;                                                \
3964 }
3965 
3966 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3967 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3968 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3969 
3970 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3971 {
3972     FloatRelation compare = float16_compare_quiet(a, b, s);
3973     return compare != float_relation_equal;
3974 }
3975 
3976 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3977 {
3978     FloatRelation compare = float32_compare_quiet(a, b, s);
3979     return compare != float_relation_equal;
3980 }
3981 
3982 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3983 {
3984     FloatRelation compare = float64_compare_quiet(a, b, s);
3985     return compare != float_relation_equal;
3986 }
3987 
3988 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3989 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3990 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3991 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3992 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3993 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3994 
3995 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3996 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3997 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3998 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3999 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4000 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4001 
4002 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4003 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4004 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4005 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4006 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4007 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4008 
4009 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4010 {
4011     FloatRelation compare = float16_compare(a, b, s);
4012     return compare == float_relation_greater;
4013 }
4014 
4015 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4016 {
4017     FloatRelation compare = float32_compare(a, b, s);
4018     return compare == float_relation_greater;
4019 }
4020 
4021 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4022 {
4023     FloatRelation compare = float64_compare(a, b, s);
4024     return compare == float_relation_greater;
4025 }
4026 
4027 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4028 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4029 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4030 
4031 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4032 {
4033     FloatRelation compare = float16_compare(a, b, s);
4034     return compare == float_relation_greater ||
4035            compare == float_relation_equal;
4036 }
4037 
4038 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4039 {
4040     FloatRelation compare = float32_compare(a, b, s);
4041     return compare == float_relation_greater ||
4042            compare == float_relation_equal;
4043 }
4044 
4045 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4046 {
4047     FloatRelation compare = float64_compare(a, b, s);
4048     return compare == float_relation_greater ||
4049            compare == float_relation_equal;
4050 }
4051 
4052 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4053 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4054 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4055 
4056 /* Vector Floating-Point Classify Instruction */
4057 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4058 static void do_##NAME(void *vd, void *vs2, int i)      \
4059 {                                                      \
4060     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4061     *((TD *)vd + HD(i)) = OP(s2);                      \
4062 }
4063 
4064 #define GEN_VEXT_V(NAME)                               \
4065 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4066                   CPURISCVState *env, uint32_t desc)   \
4067 {                                                      \
4068     uint32_t vm = vext_vm(desc);                       \
4069     uint32_t vl = env->vl;                             \
4070     uint32_t i;                                        \
4071                                                        \
4072     for (i = env->vstart; i < vl; i++) {               \
4073         if (!vm && !vext_elem_mask(v0, i)) {           \
4074             continue;                                  \
4075         }                                              \
4076         do_##NAME(vd, vs2, i);                         \
4077     }                                                  \
4078     env->vstart = 0;                                   \
4079 }
4080 
4081 target_ulong fclass_h(uint64_t frs1)
4082 {
4083     float16 f = frs1;
4084     bool sign = float16_is_neg(f);
4085 
4086     if (float16_is_infinity(f)) {
4087         return sign ? 1 << 0 : 1 << 7;
4088     } else if (float16_is_zero(f)) {
4089         return sign ? 1 << 3 : 1 << 4;
4090     } else if (float16_is_zero_or_denormal(f)) {
4091         return sign ? 1 << 2 : 1 << 5;
4092     } else if (float16_is_any_nan(f)) {
4093         float_status s = { }; /* for snan_bit_is_one */
4094         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4095     } else {
4096         return sign ? 1 << 1 : 1 << 6;
4097     }
4098 }
4099 
4100 target_ulong fclass_s(uint64_t frs1)
4101 {
4102     float32 f = frs1;
4103     bool sign = float32_is_neg(f);
4104 
4105     if (float32_is_infinity(f)) {
4106         return sign ? 1 << 0 : 1 << 7;
4107     } else if (float32_is_zero(f)) {
4108         return sign ? 1 << 3 : 1 << 4;
4109     } else if (float32_is_zero_or_denormal(f)) {
4110         return sign ? 1 << 2 : 1 << 5;
4111     } else if (float32_is_any_nan(f)) {
4112         float_status s = { }; /* for snan_bit_is_one */
4113         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4114     } else {
4115         return sign ? 1 << 1 : 1 << 6;
4116     }
4117 }
4118 
4119 target_ulong fclass_d(uint64_t frs1)
4120 {
4121     float64 f = frs1;
4122     bool sign = float64_is_neg(f);
4123 
4124     if (float64_is_infinity(f)) {
4125         return sign ? 1 << 0 : 1 << 7;
4126     } else if (float64_is_zero(f)) {
4127         return sign ? 1 << 3 : 1 << 4;
4128     } else if (float64_is_zero_or_denormal(f)) {
4129         return sign ? 1 << 2 : 1 << 5;
4130     } else if (float64_is_any_nan(f)) {
4131         float_status s = { }; /* for snan_bit_is_one */
4132         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4133     } else {
4134         return sign ? 1 << 1 : 1 << 6;
4135     }
4136 }
4137 
4138 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4139 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4140 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4141 GEN_VEXT_V(vfclass_v_h)
4142 GEN_VEXT_V(vfclass_v_w)
4143 GEN_VEXT_V(vfclass_v_d)
4144 
4145 /* Vector Floating-Point Merge Instruction */
4146 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
4147 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4148                   CPURISCVState *env, uint32_t desc)          \
4149 {                                                             \
4150     uint32_t vm = vext_vm(desc);                              \
4151     uint32_t vl = env->vl;                                    \
4152     uint32_t i;                                               \
4153                                                               \
4154     for (i = env->vstart; i < vl; i++) {                      \
4155         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4156         *((ETYPE *)vd + H(i))                                 \
4157           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
4158     }                                                         \
4159     env->vstart = 0;                                          \
4160 }
4161 
4162 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4163 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4164 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4165 
4166 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4167 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4168 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4169 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4170 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4171 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h)
4172 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w)
4173 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d)
4174 
4175 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4176 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4177 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4178 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4179 GEN_VEXT_V_ENV(vfcvt_x_f_v_h)
4180 GEN_VEXT_V_ENV(vfcvt_x_f_v_w)
4181 GEN_VEXT_V_ENV(vfcvt_x_f_v_d)
4182 
4183 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4184 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4185 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4186 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4187 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h)
4188 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w)
4189 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d)
4190 
4191 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4192 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4193 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4194 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4195 GEN_VEXT_V_ENV(vfcvt_f_x_v_h)
4196 GEN_VEXT_V_ENV(vfcvt_f_x_v_w)
4197 GEN_VEXT_V_ENV(vfcvt_f_x_v_d)
4198 
4199 /* Widening Floating-Point/Integer Type-Convert Instructions */
4200 /* (TD, T2, TX2) */
4201 #define WOP_UU_B uint16_t, uint8_t,  uint8_t
4202 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4203 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4204 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4205 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4206 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4207 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h)
4208 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w)
4209 
4210 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4211 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4212 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4213 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h)
4214 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w)
4215 
4216 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4217 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4218 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4219 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4220 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b)
4221 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h)
4222 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w)
4223 
4224 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4225 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4226 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4227 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4228 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b)
4229 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h)
4230 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w)
4231 
4232 /*
4233  * vfwcvt.f.f.v vd, vs2, vm
4234  * Convert single-width float to double-width float.
4235  */
4236 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4237 {
4238     return float16_to_float32(a, true, s);
4239 }
4240 
4241 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4242 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4243 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h)
4244 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w)
4245 
4246 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4247 /* (TD, T2, TX2) */
4248 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
4249 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4250 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4251 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4252 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4253 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4254 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4255 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b)
4256 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h)
4257 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w)
4258 
4259 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4260 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4261 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4262 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4263 GEN_VEXT_V_ENV(vfncvt_x_f_w_b)
4264 GEN_VEXT_V_ENV(vfncvt_x_f_w_h)
4265 GEN_VEXT_V_ENV(vfncvt_x_f_w_w)
4266 
4267 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4268 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4269 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4270 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h)
4271 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w)
4272 
4273 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4274 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4275 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4276 GEN_VEXT_V_ENV(vfncvt_f_x_w_h)
4277 GEN_VEXT_V_ENV(vfncvt_f_x_w_w)
4278 
4279 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4280 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4281 {
4282     return float32_to_float16(a, true, s);
4283 }
4284 
4285 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4286 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4287 GEN_VEXT_V_ENV(vfncvt_f_f_w_h)
4288 GEN_VEXT_V_ENV(vfncvt_f_f_w_w)
4289 
4290 /*
4291  *** Vector Reduction Operations
4292  */
4293 /* Vector Single-Width Integer Reduction Instructions */
4294 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4295 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4296         void *vs2, CPURISCVState *env, uint32_t desc)     \
4297 {                                                         \
4298     uint32_t vm = vext_vm(desc);                          \
4299     uint32_t vl = env->vl;                                \
4300     uint32_t i;                                           \
4301     TD s1 =  *((TD *)vs1 + HD(0));                        \
4302                                                           \
4303     for (i = env->vstart; i < vl; i++) {                  \
4304         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4305         if (!vm && !vext_elem_mask(v0, i)) {              \
4306             continue;                                     \
4307         }                                                 \
4308         s1 = OP(s1, (TD)s2);                              \
4309     }                                                     \
4310     *((TD *)vd + HD(0)) = s1;                             \
4311     env->vstart = 0;                                      \
4312 }
4313 
4314 /* vd[0] = sum(vs1[0], vs2[*]) */
4315 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4316 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4317 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4318 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4319 
4320 /* vd[0] = maxu(vs1[0], vs2[*]) */
4321 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4322 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4323 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4324 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4325 
4326 /* vd[0] = max(vs1[0], vs2[*]) */
4327 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4328 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4329 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4330 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4331 
4332 /* vd[0] = minu(vs1[0], vs2[*]) */
4333 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4334 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4335 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4336 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4337 
4338 /* vd[0] = min(vs1[0], vs2[*]) */
4339 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4340 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4341 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4342 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4343 
4344 /* vd[0] = and(vs1[0], vs2[*]) */
4345 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4346 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4347 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4348 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4349 
4350 /* vd[0] = or(vs1[0], vs2[*]) */
4351 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4352 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4353 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4354 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4355 
4356 /* vd[0] = xor(vs1[0], vs2[*]) */
4357 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4358 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4359 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4360 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4361 
4362 /* Vector Widening Integer Reduction Instructions */
4363 /* signed sum reduction into double-width accumulator */
4364 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4365 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4366 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4367 
4368 /* Unsigned sum reduction into double-width accumulator */
4369 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4370 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4371 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4372 
4373 /* Vector Single-Width Floating-Point Reduction Instructions */
4374 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4375 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4376                   void *vs2, CPURISCVState *env,           \
4377                   uint32_t desc)                           \
4378 {                                                          \
4379     uint32_t vm = vext_vm(desc);                           \
4380     uint32_t vl = env->vl;                                 \
4381     uint32_t i;                                            \
4382     TD s1 =  *((TD *)vs1 + HD(0));                         \
4383                                                            \
4384     for (i = env->vstart; i < vl; i++) {                   \
4385         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4386         if (!vm && !vext_elem_mask(v0, i)) {               \
4387             continue;                                      \
4388         }                                                  \
4389         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4390     }                                                      \
4391     *((TD *)vd + HD(0)) = s1;                              \
4392     env->vstart = 0;                                       \
4393 }
4394 
4395 /* Unordered sum */
4396 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4397 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4398 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4399 
4400 /* Maximum value */
4401 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4402 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4403 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4404 
4405 /* Minimum value */
4406 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4407 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4408 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4409 
4410 /* Vector Widening Floating-Point Reduction Instructions */
4411 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4412 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4413                             void *vs2, CPURISCVState *env, uint32_t desc)
4414 {
4415     uint32_t vm = vext_vm(desc);
4416     uint32_t vl = env->vl;
4417     uint32_t i;
4418     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4419 
4420     for (i = env->vstart; i < vl; i++) {
4421         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4422         if (!vm && !vext_elem_mask(v0, i)) {
4423             continue;
4424         }
4425         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4426                          &env->fp_status);
4427     }
4428     *((uint32_t *)vd + H4(0)) = s1;
4429     env->vstart = 0;
4430 }
4431 
4432 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4433                             void *vs2, CPURISCVState *env, uint32_t desc)
4434 {
4435     uint32_t vm = vext_vm(desc);
4436     uint32_t vl = env->vl;
4437     uint32_t i;
4438     uint64_t s1 =  *((uint64_t *)vs1);
4439 
4440     for (i = env->vstart; i < vl; i++) {
4441         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4442         if (!vm && !vext_elem_mask(v0, i)) {
4443             continue;
4444         }
4445         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4446                          &env->fp_status);
4447     }
4448     *((uint64_t *)vd) = s1;
4449     env->vstart = 0;
4450 }
4451 
4452 /*
4453  *** Vector Mask Operations
4454  */
4455 /* Vector Mask-Register Logical Instructions */
4456 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4457 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4458                   void *vs2, CPURISCVState *env,          \
4459                   uint32_t desc)                          \
4460 {                                                         \
4461     uint32_t vl = env->vl;                                \
4462     uint32_t i;                                           \
4463     int a, b;                                             \
4464                                                           \
4465     for (i = env->vstart; i < vl; i++) {                  \
4466         a = vext_elem_mask(vs1, i);                       \
4467         b = vext_elem_mask(vs2, i);                       \
4468         vext_set_elem_mask(vd, i, OP(b, a));              \
4469     }                                                     \
4470     env->vstart = 0;                                      \
4471 }
4472 
4473 #define DO_NAND(N, M)  (!(N & M))
4474 #define DO_ANDNOT(N, M)  (N & !M)
4475 #define DO_NOR(N, M)  (!(N | M))
4476 #define DO_ORNOT(N, M)  (N | !M)
4477 #define DO_XNOR(N, M)  (!(N ^ M))
4478 
4479 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4480 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4481 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4482 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4483 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4484 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4485 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4486 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4487 
4488 /* Vector count population in mask vcpop */
4489 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4490                              uint32_t desc)
4491 {
4492     target_ulong cnt = 0;
4493     uint32_t vm = vext_vm(desc);
4494     uint32_t vl = env->vl;
4495     int i;
4496 
4497     for (i = env->vstart; i < vl; i++) {
4498         if (vm || vext_elem_mask(v0, i)) {
4499             if (vext_elem_mask(vs2, i)) {
4500                 cnt++;
4501             }
4502         }
4503     }
4504     env->vstart = 0;
4505     return cnt;
4506 }
4507 
4508 /* vfirst find-first-set mask bit*/
4509 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4510                               uint32_t desc)
4511 {
4512     uint32_t vm = vext_vm(desc);
4513     uint32_t vl = env->vl;
4514     int i;
4515 
4516     for (i = env->vstart; i < vl; i++) {
4517         if (vm || vext_elem_mask(v0, i)) {
4518             if (vext_elem_mask(vs2, i)) {
4519                 return i;
4520             }
4521         }
4522     }
4523     env->vstart = 0;
4524     return -1LL;
4525 }
4526 
4527 enum set_mask_type {
4528     ONLY_FIRST = 1,
4529     INCLUDE_FIRST,
4530     BEFORE_FIRST,
4531 };
4532 
4533 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4534                    uint32_t desc, enum set_mask_type type)
4535 {
4536     uint32_t vm = vext_vm(desc);
4537     uint32_t vl = env->vl;
4538     int i;
4539     bool first_mask_bit = false;
4540 
4541     for (i = env->vstart; i < vl; i++) {
4542         if (!vm && !vext_elem_mask(v0, i)) {
4543             continue;
4544         }
4545         /* write a zero to all following active elements */
4546         if (first_mask_bit) {
4547             vext_set_elem_mask(vd, i, 0);
4548             continue;
4549         }
4550         if (vext_elem_mask(vs2, i)) {
4551             first_mask_bit = true;
4552             if (type == BEFORE_FIRST) {
4553                 vext_set_elem_mask(vd, i, 0);
4554             } else {
4555                 vext_set_elem_mask(vd, i, 1);
4556             }
4557         } else {
4558             if (type == ONLY_FIRST) {
4559                 vext_set_elem_mask(vd, i, 0);
4560             } else {
4561                 vext_set_elem_mask(vd, i, 1);
4562             }
4563         }
4564     }
4565     env->vstart = 0;
4566 }
4567 
4568 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4569                      uint32_t desc)
4570 {
4571     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4572 }
4573 
4574 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4575                      uint32_t desc)
4576 {
4577     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4578 }
4579 
4580 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4581                      uint32_t desc)
4582 {
4583     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4584 }
4585 
4586 /* Vector Iota Instruction */
4587 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4588 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4589                   uint32_t desc)                                          \
4590 {                                                                         \
4591     uint32_t vm = vext_vm(desc);                                          \
4592     uint32_t vl = env->vl;                                                \
4593     uint32_t sum = 0;                                                     \
4594     int i;                                                                \
4595                                                                           \
4596     for (i = env->vstart; i < vl; i++) {                                  \
4597         if (!vm && !vext_elem_mask(v0, i)) {                              \
4598             continue;                                                     \
4599         }                                                                 \
4600         *((ETYPE *)vd + H(i)) = sum;                                      \
4601         if (vext_elem_mask(vs2, i)) {                                     \
4602             sum++;                                                        \
4603         }                                                                 \
4604     }                                                                     \
4605     env->vstart = 0;                                                      \
4606 }
4607 
4608 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4609 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4610 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4611 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4612 
4613 /* Vector Element Index Instruction */
4614 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4615 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4616 {                                                                         \
4617     uint32_t vm = vext_vm(desc);                                          \
4618     uint32_t vl = env->vl;                                                \
4619     int i;                                                                \
4620                                                                           \
4621     for (i = env->vstart; i < vl; i++) {                                  \
4622         if (!vm && !vext_elem_mask(v0, i)) {                              \
4623             continue;                                                     \
4624         }                                                                 \
4625         *((ETYPE *)vd + H(i)) = i;                                        \
4626     }                                                                     \
4627     env->vstart = 0;                                                      \
4628 }
4629 
4630 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4631 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4632 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4633 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4634 
4635 /*
4636  *** Vector Permutation Instructions
4637  */
4638 
4639 /* Vector Slide Instructions */
4640 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4641 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4642                   CPURISCVState *env, uint32_t desc)                      \
4643 {                                                                         \
4644     uint32_t vm = vext_vm(desc);                                          \
4645     uint32_t vl = env->vl;                                                \
4646     target_ulong offset = s1, i_min, i;                                   \
4647                                                                           \
4648     i_min = MAX(env->vstart, offset);                                     \
4649     for (i = i_min; i < vl; i++) {                                        \
4650         if (!vm && !vext_elem_mask(v0, i)) {                              \
4651             continue;                                                     \
4652         }                                                                 \
4653         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4654     }                                                                     \
4655 }
4656 
4657 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4658 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4659 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4660 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4661 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4662 
4663 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4664 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4665                   CPURISCVState *env, uint32_t desc)                      \
4666 {                                                                         \
4667     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4668     uint32_t vm = vext_vm(desc);                                          \
4669     uint32_t vl = env->vl;                                                \
4670     target_ulong i_max, i;                                                \
4671                                                                           \
4672     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4673     for (i = env->vstart; i < i_max; ++i) {                               \
4674         if (vm || vext_elem_mask(v0, i)) {                                \
4675             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4676         }                                                                 \
4677     }                                                                     \
4678                                                                           \
4679     for (i = i_max; i < vl; ++i) {                                        \
4680         if (vm || vext_elem_mask(v0, i)) {                                \
4681             *((ETYPE *)vd + H(i)) = 0;                                    \
4682         }                                                                 \
4683     }                                                                     \
4684                                                                           \
4685     env->vstart = 0;                                                      \
4686 }
4687 
4688 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4689 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4690 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4691 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4692 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4693 
4694 #define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4695 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4696                      CPURISCVState *env, uint32_t desc)                     \
4697 {                                                                           \
4698     typedef uint##ESZ##_t ETYPE;                                            \
4699     uint32_t vm = vext_vm(desc);                                            \
4700     uint32_t vl = env->vl;                                                  \
4701     uint32_t i;                                                             \
4702                                                                             \
4703     for (i = env->vstart; i < vl; i++) {                                    \
4704         if (!vm && !vext_elem_mask(v0, i)) {                                \
4705             continue;                                                       \
4706         }                                                                   \
4707         if (i == 0) {                                                       \
4708             *((ETYPE *)vd + H(i)) = s1;                                     \
4709         } else {                                                            \
4710             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4711         }                                                                   \
4712     }                                                                       \
4713     env->vstart = 0;                                                        \
4714 }
4715 
4716 GEN_VEXT_VSLIE1UP(8,  H1)
4717 GEN_VEXT_VSLIE1UP(16, H2)
4718 GEN_VEXT_VSLIE1UP(32, H4)
4719 GEN_VEXT_VSLIE1UP(64, H8)
4720 
4721 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4722 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4723                   CPURISCVState *env, uint32_t desc)              \
4724 {                                                                 \
4725     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4726 }
4727 
4728 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4729 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4730 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4731 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4732 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4733 
4734 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4735 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4736                        CPURISCVState *env, uint32_t desc)                     \
4737 {                                                                             \
4738     typedef uint##ESZ##_t ETYPE;                                              \
4739     uint32_t vm = vext_vm(desc);                                              \
4740     uint32_t vl = env->vl;                                                    \
4741     uint32_t i;                                                               \
4742                                                                               \
4743     for (i = env->vstart; i < vl; i++) {                                      \
4744         if (!vm && !vext_elem_mask(v0, i)) {                                  \
4745             continue;                                                         \
4746         }                                                                     \
4747         if (i == vl - 1) {                                                    \
4748             *((ETYPE *)vd + H(i)) = s1;                                       \
4749         } else {                                                              \
4750             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4751         }                                                                     \
4752     }                                                                         \
4753     env->vstart = 0;                                                          \
4754 }
4755 
4756 GEN_VEXT_VSLIDE1DOWN(8,  H1)
4757 GEN_VEXT_VSLIDE1DOWN(16, H2)
4758 GEN_VEXT_VSLIDE1DOWN(32, H4)
4759 GEN_VEXT_VSLIDE1DOWN(64, H8)
4760 
4761 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4762 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4763                   CPURISCVState *env, uint32_t desc)              \
4764 {                                                                 \
4765     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4766 }
4767 
4768 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4769 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4770 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4771 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4772 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4773 
4774 /* Vector Floating-Point Slide Instructions */
4775 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4776 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4777                   CPURISCVState *env, uint32_t desc)          \
4778 {                                                             \
4779     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4780 }
4781 
4782 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4783 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4784 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4785 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4786 
4787 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4788 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4789                   CPURISCVState *env, uint32_t desc)          \
4790 {                                                             \
4791     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4792 }
4793 
4794 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4795 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4796 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4797 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4798 
4799 /* Vector Register Gather Instruction */
4800 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4801 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4802                   CPURISCVState *env, uint32_t desc)                      \
4803 {                                                                         \
4804     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4805     uint32_t vm = vext_vm(desc);                                          \
4806     uint32_t vl = env->vl;                                                \
4807     uint64_t index;                                                       \
4808     uint32_t i;                                                           \
4809                                                                           \
4810     for (i = env->vstart; i < vl; i++) {                                  \
4811         if (!vm && !vext_elem_mask(v0, i)) {                              \
4812             continue;                                                     \
4813         }                                                                 \
4814         index = *((TS1 *)vs1 + HS1(i));                                   \
4815         if (index >= vlmax) {                                             \
4816             *((TS2 *)vd + HS2(i)) = 0;                                    \
4817         } else {                                                          \
4818             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4819         }                                                                 \
4820     }                                                                     \
4821     env->vstart = 0;                                                      \
4822 }
4823 
4824 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4825 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4826 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4827 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4828 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4829 
4830 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4831 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4832 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4833 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4834 
4835 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4836 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4837                   CPURISCVState *env, uint32_t desc)                      \
4838 {                                                                         \
4839     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4840     uint32_t vm = vext_vm(desc);                                          \
4841     uint32_t vl = env->vl;                                                \
4842     uint64_t index = s1;                                                  \
4843     uint32_t i;                                                           \
4844                                                                           \
4845     for (i = env->vstart; i < vl; i++) {                                  \
4846         if (!vm && !vext_elem_mask(v0, i)) {                              \
4847             continue;                                                     \
4848         }                                                                 \
4849         if (index >= vlmax) {                                             \
4850             *((ETYPE *)vd + H(i)) = 0;                                    \
4851         } else {                                                          \
4852             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4853         }                                                                 \
4854     }                                                                     \
4855     env->vstart = 0;                                                      \
4856 }
4857 
4858 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4859 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4860 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4861 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4862 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4863 
4864 /* Vector Compress Instruction */
4865 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4866 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4867                   CPURISCVState *env, uint32_t desc)                      \
4868 {                                                                         \
4869     uint32_t vl = env->vl;                                                \
4870     uint32_t num = 0, i;                                                  \
4871                                                                           \
4872     for (i = env->vstart; i < vl; i++) {                                  \
4873         if (!vext_elem_mask(vs1, i)) {                                    \
4874             continue;                                                     \
4875         }                                                                 \
4876         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4877         num++;                                                            \
4878     }                                                                     \
4879     env->vstart = 0;                                                      \
4880 }
4881 
4882 /* Compress into vd elements of vs2 where vs1 is enabled */
4883 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4884 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4885 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4886 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4887 
4888 /* Vector Whole Register Move */
4889 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
4890 {
4891     /* EEW = SEW */
4892     uint32_t maxsz = simd_maxsz(desc);
4893     uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
4894     uint32_t startb = env->vstart * sewb;
4895     uint32_t i = startb;
4896 
4897     memcpy((uint8_t *)vd + H1(i),
4898            (uint8_t *)vs2 + H1(i),
4899            maxsz - startb);
4900 
4901     env->vstart = 0;
4902 }
4903 
4904 /* Vector Integer Extension */
4905 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4906 void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4907                   CPURISCVState *env, uint32_t desc)             \
4908 {                                                                \
4909     uint32_t vl = env->vl;                                       \
4910     uint32_t vm = vext_vm(desc);                                 \
4911     uint32_t i;                                                  \
4912                                                                  \
4913     for (i = env->vstart; i < vl; i++) {                         \
4914         if (!vm && !vext_elem_mask(v0, i)) {                     \
4915             continue;                                            \
4916         }                                                        \
4917         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4918     }                                                            \
4919     env->vstart = 0;                                             \
4920 }
4921 
4922 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4923 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4924 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4925 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4926 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4927 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4928 
4929 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4930 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4931 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4932 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4933 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4934 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4935