xref: /openbmc/qemu/target/riscv/vector_helper.c (revision 25eae048)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30 
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32                             target_ulong s2)
33 {
34     int vlmax, vl;
35     RISCVCPU *cpu = env_archcpu(env);
36     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39     int xlen = riscv_cpu_xlen(env);
40     bool vill = (s2 >> (xlen - 1)) & 0x1;
41     target_ulong reserved = s2 &
42                             MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43                                             xlen - 1 - R_VTYPE_RESERVED_SHIFT);
44 
45     if (lmul & 4) {
46         /* Fractional LMUL. */
47         if (lmul == 4 ||
48             cpu->cfg.elen >> (8 - lmul) < sew) {
49             vill = true;
50         }
51     }
52 
53     if ((sew > cpu->cfg.elen)
54         || vill
55         || (ediv != 0)
56         || (reserved != 0)) {
57         /* only set vill bit. */
58         env->vill = 1;
59         env->vtype = 0;
60         env->vl = 0;
61         env->vstart = 0;
62         return 0;
63     }
64 
65     vlmax = vext_get_vlmax(cpu, s2);
66     if (s1 <= vlmax) {
67         vl = s1;
68     } else {
69         vl = vlmax;
70     }
71     env->vl = vl;
72     env->vtype = s2;
73     env->vstart = 0;
74     env->vill = 0;
75     return vl;
76 }
77 
78 /*
79  * Note that vector data is stored in host-endian 64-bit chunks,
80  * so addressing units smaller than that needs a host-endian fixup.
81  */
82 #if HOST_BIG_ENDIAN
83 #define H1(x)   ((x) ^ 7)
84 #define H1_2(x) ((x) ^ 6)
85 #define H1_4(x) ((x) ^ 4)
86 #define H2(x)   ((x) ^ 3)
87 #define H4(x)   ((x) ^ 1)
88 #define H8(x)   ((x))
89 #else
90 #define H1(x)   (x)
91 #define H1_2(x) (x)
92 #define H1_4(x) (x)
93 #define H2(x)   (x)
94 #define H4(x)   (x)
95 #define H8(x)   (x)
96 #endif
97 
98 static inline uint32_t vext_nf(uint32_t desc)
99 {
100     return FIELD_EX32(simd_data(desc), VDATA, NF);
101 }
102 
103 static inline uint32_t vext_vm(uint32_t desc)
104 {
105     return FIELD_EX32(simd_data(desc), VDATA, VM);
106 }
107 
108 /*
109  * Encode LMUL to lmul as following:
110  *     LMUL    vlmul    lmul
111  *      1       000       0
112  *      2       001       1
113  *      4       010       2
114  *      8       011       3
115  *      -       100       -
116  *     1/8      101      -3
117  *     1/4      110      -2
118  *     1/2      111      -1
119  */
120 static inline int32_t vext_lmul(uint32_t desc)
121 {
122     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
123 }
124 
125 /*
126  * Get the maximum number of elements can be operated.
127  *
128  * esz: log2 of element size in bytes.
129  */
130 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
131 {
132     /*
133      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
134      * so vlen in bytes (vlenb) is encoded as maxsz.
135      */
136     uint32_t vlenb = simd_maxsz(desc);
137 
138     /* Return VLMAX */
139     int scale = vext_lmul(desc) - esz;
140     return scale < 0 ? vlenb >> -scale : vlenb << scale;
141 }
142 
143 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
144 {
145     return (addr & env->cur_pmmask) | env->cur_pmbase;
146 }
147 
148 /*
149  * This function checks watchpoint before real load operation.
150  *
151  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
152  * In user mode, there is no watchpoint support now.
153  *
154  * It will trigger an exception if there is no mapping in TLB
155  * and page table walk can't fill the TLB entry. Then the guest
156  * software can return here after process the exception or never return.
157  */
158 static void probe_pages(CPURISCVState *env, target_ulong addr,
159                         target_ulong len, uintptr_t ra,
160                         MMUAccessType access_type)
161 {
162     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
163     target_ulong curlen = MIN(pagelen, len);
164 
165     probe_access(env, adjust_addr(env, addr), curlen, access_type,
166                  cpu_mmu_index(env, false), ra);
167     if (len > curlen) {
168         addr += curlen;
169         curlen = len - curlen;
170         probe_access(env, adjust_addr(env, addr), curlen, access_type,
171                      cpu_mmu_index(env, false), ra);
172     }
173 }
174 
175 static inline void vext_set_elem_mask(void *v0, int index,
176                                       uint8_t value)
177 {
178     int idx = index / 64;
179     int pos = index % 64;
180     uint64_t old = ((uint64_t *)v0)[idx];
181     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
182 }
183 
184 /*
185  * Earlier designs (pre-0.9) had a varying number of bits
186  * per mask value (MLEN). In the 0.9 design, MLEN=1.
187  * (Section 4.5)
188  */
189 static inline int vext_elem_mask(void *v0, int index)
190 {
191     int idx = index / 64;
192     int pos = index  % 64;
193     return (((uint64_t *)v0)[idx] >> pos) & 1;
194 }
195 
196 /* elements operations for load and store */
197 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
198                                uint32_t idx, void *vd, uintptr_t retaddr);
199 
200 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
201 static void NAME(CPURISCVState *env, abi_ptr addr,         \
202                  uint32_t idx, void *vd, uintptr_t retaddr)\
203 {                                                          \
204     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
205     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
206 }                                                          \
207 
208 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
209 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
210 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
211 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
212 
213 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
214 static void NAME(CPURISCVState *env, abi_ptr addr,         \
215                  uint32_t idx, void *vd, uintptr_t retaddr)\
216 {                                                          \
217     ETYPE data = *((ETYPE *)vd + H(idx));                  \
218     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
219 }
220 
221 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
222 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
223 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
224 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
225 
226 /*
227  *** stride: access vector element from strided memory
228  */
229 static void
230 vext_ldst_stride(void *vd, void *v0, target_ulong base,
231                  target_ulong stride, CPURISCVState *env,
232                  uint32_t desc, uint32_t vm,
233                  vext_ldst_elem_fn *ldst_elem,
234                  uint32_t esz, uintptr_t ra)
235 {
236     uint32_t i, k;
237     uint32_t nf = vext_nf(desc);
238     uint32_t max_elems = vext_max_elems(desc, esz);
239 
240     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
241         if (!vm && !vext_elem_mask(v0, i)) {
242             continue;
243         }
244 
245         k = 0;
246         while (k < nf) {
247             target_ulong addr = base + stride * i + (k << esz);
248             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
249             k++;
250         }
251     }
252     env->vstart = 0;
253 }
254 
255 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
256 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
257                   target_ulong stride, CPURISCVState *env,              \
258                   uint32_t desc)                                        \
259 {                                                                       \
260     uint32_t vm = vext_vm(desc);                                        \
261     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
262                      ctzl(sizeof(ETYPE)), GETPC());                     \
263 }
264 
265 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
266 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
267 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
268 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
269 
270 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
271 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
272                   target_ulong stride, CPURISCVState *env,              \
273                   uint32_t desc)                                        \
274 {                                                                       \
275     uint32_t vm = vext_vm(desc);                                        \
276     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
277                      ctzl(sizeof(ETYPE)), GETPC());                     \
278 }
279 
280 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
281 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
282 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
283 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
284 
285 /*
286  *** unit-stride: access elements stored contiguously in memory
287  */
288 
289 /* unmasked unit-stride load and store operation*/
290 static void
291 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
292              vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
293              uintptr_t ra)
294 {
295     uint32_t i, k;
296     uint32_t nf = vext_nf(desc);
297     uint32_t max_elems = vext_max_elems(desc, esz);
298 
299     /* load bytes from guest memory */
300     for (i = env->vstart; i < evl; i++, env->vstart++) {
301         k = 0;
302         while (k < nf) {
303             target_ulong addr = base + ((i * nf + k) << esz);
304             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
305             k++;
306         }
307     }
308     env->vstart = 0;
309 }
310 
311 /*
312  * masked unit-stride load and store operation will be a special case of stride,
313  * stride = NF * sizeof (MTYPE)
314  */
315 
316 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
317 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
318                          CPURISCVState *env, uint32_t desc)             \
319 {                                                                       \
320     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
321     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
322                      ctzl(sizeof(ETYPE)), GETPC());                     \
323 }                                                                       \
324                                                                         \
325 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
326                   CPURISCVState *env, uint32_t desc)                    \
327 {                                                                       \
328     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
329                  ctzl(sizeof(ETYPE)), env->vl, GETPC());                \
330 }
331 
332 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
333 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
334 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
335 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
336 
337 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                            \
338 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,          \
339                          CPURISCVState *env, uint32_t desc)              \
340 {                                                                        \
341     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));              \
342     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,   \
343                      ctzl(sizeof(ETYPE)), GETPC());                      \
344 }                                                                        \
345                                                                          \
346 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                 \
347                   CPURISCVState *env, uint32_t desc)                     \
348 {                                                                        \
349     vext_ldst_us(vd, base, env, desc, STORE_FN,                          \
350                  ctzl(sizeof(ETYPE)), env->vl, GETPC());                 \
351 }
352 
353 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
354 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
355 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
356 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
357 
358 /*
359  *** unit stride mask load and store, EEW = 1
360  */
361 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
362                     CPURISCVState *env, uint32_t desc)
363 {
364     /* evl = ceil(vl/8) */
365     uint8_t evl = (env->vl + 7) >> 3;
366     vext_ldst_us(vd, base, env, desc, lde_b,
367                  0, evl, GETPC());
368 }
369 
370 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
371                     CPURISCVState *env, uint32_t desc)
372 {
373     /* evl = ceil(vl/8) */
374     uint8_t evl = (env->vl + 7) >> 3;
375     vext_ldst_us(vd, base, env, desc, ste_b,
376                  0, evl, GETPC());
377 }
378 
379 /*
380  *** index: access vector element from indexed memory
381  */
382 typedef target_ulong vext_get_index_addr(target_ulong base,
383         uint32_t idx, void *vs2);
384 
385 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
386 static target_ulong NAME(target_ulong base,            \
387                          uint32_t idx, void *vs2)      \
388 {                                                      \
389     return (base + *((ETYPE *)vs2 + H(idx)));          \
390 }
391 
392 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
393 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
394 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
395 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
396 
397 static inline void
398 vext_ldst_index(void *vd, void *v0, target_ulong base,
399                 void *vs2, CPURISCVState *env, uint32_t desc,
400                 vext_get_index_addr get_index_addr,
401                 vext_ldst_elem_fn *ldst_elem,
402                 uint32_t esz, uintptr_t ra)
403 {
404     uint32_t i, k;
405     uint32_t nf = vext_nf(desc);
406     uint32_t vm = vext_vm(desc);
407     uint32_t max_elems = vext_max_elems(desc, esz);
408 
409     /* load bytes from guest memory */
410     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
411         if (!vm && !vext_elem_mask(v0, i)) {
412             continue;
413         }
414 
415         k = 0;
416         while (k < nf) {
417             abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
418             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
419             k++;
420         }
421     }
422     env->vstart = 0;
423 }
424 
425 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
426 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
427                   void *vs2, CPURISCVState *env, uint32_t desc)            \
428 {                                                                          \
429     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
430                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC());                \
431 }
432 
433 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
434 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
435 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
436 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
437 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
438 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
439 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
440 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
441 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
442 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
443 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
444 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
445 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
446 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
447 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
448 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
449 
450 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
451 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
452                   void *vs2, CPURISCVState *env, uint32_t desc)  \
453 {                                                                \
454     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
455                     STORE_FN, ctzl(sizeof(ETYPE)),               \
456                     GETPC());                                    \
457 }
458 
459 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
460 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
461 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
462 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
463 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
464 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
465 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
466 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
467 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
468 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
469 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
470 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
471 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
472 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
473 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
474 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
475 
476 /*
477  *** unit-stride fault-only-fisrt load instructions
478  */
479 static inline void
480 vext_ldff(void *vd, void *v0, target_ulong base,
481           CPURISCVState *env, uint32_t desc,
482           vext_ldst_elem_fn *ldst_elem,
483           uint32_t esz, uintptr_t ra)
484 {
485     void *host;
486     uint32_t i, k, vl = 0;
487     uint32_t nf = vext_nf(desc);
488     uint32_t vm = vext_vm(desc);
489     uint32_t max_elems = vext_max_elems(desc, esz);
490     target_ulong addr, offset, remain;
491 
492     /* probe every access*/
493     for (i = env->vstart; i < env->vl; i++) {
494         if (!vm && !vext_elem_mask(v0, i)) {
495             continue;
496         }
497         addr = adjust_addr(env, base + i * (nf << esz));
498         if (i == 0) {
499             probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
500         } else {
501             /* if it triggers an exception, no need to check watchpoint */
502             remain = nf << esz;
503             while (remain > 0) {
504                 offset = -(addr | TARGET_PAGE_MASK);
505                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
506                                          cpu_mmu_index(env, false));
507                 if (host) {
508 #ifdef CONFIG_USER_ONLY
509                     if (page_check_range(addr, offset, PAGE_READ) < 0) {
510                         vl = i;
511                         goto ProbeSuccess;
512                     }
513 #else
514                     probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
515 #endif
516                 } else {
517                     vl = i;
518                     goto ProbeSuccess;
519                 }
520                 if (remain <=  offset) {
521                     break;
522                 }
523                 remain -= offset;
524                 addr = adjust_addr(env, addr + offset);
525             }
526         }
527     }
528 ProbeSuccess:
529     /* load bytes from guest memory */
530     if (vl != 0) {
531         env->vl = vl;
532     }
533     for (i = env->vstart; i < env->vl; i++) {
534         k = 0;
535         if (!vm && !vext_elem_mask(v0, i)) {
536             continue;
537         }
538         while (k < nf) {
539             target_ulong addr = base + ((i * nf + k) << esz);
540             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
541             k++;
542         }
543     }
544     env->vstart = 0;
545 }
546 
547 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
548 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
549                   CPURISCVState *env, uint32_t desc)      \
550 {                                                         \
551     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
552               ctzl(sizeof(ETYPE)), GETPC());              \
553 }
554 
555 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
556 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
557 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
558 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
559 
560 #define DO_SWAP(N, M) (M)
561 #define DO_AND(N, M)  (N & M)
562 #define DO_XOR(N, M)  (N ^ M)
563 #define DO_OR(N, M)   (N | M)
564 #define DO_ADD(N, M)  (N + M)
565 
566 /* Signed min/max */
567 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
568 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
569 
570 /* Unsigned min/max */
571 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
572 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
573 
574 /*
575  *** load and store whole register instructions
576  */
577 static void
578 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
579                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra)
580 {
581     uint32_t i, k, off, pos;
582     uint32_t nf = vext_nf(desc);
583     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
584     uint32_t max_elems = vlenb >> esz;
585 
586     k = env->vstart / max_elems;
587     off = env->vstart % max_elems;
588 
589     if (off) {
590         /* load/store rest of elements of current segment pointed by vstart */
591         for (pos = off; pos < max_elems; pos++, env->vstart++) {
592             target_ulong addr = base + ((pos + k * max_elems) << esz);
593             ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
594         }
595         k++;
596     }
597 
598     /* load/store elements for rest of segments */
599     for (; k < nf; k++) {
600         for (i = 0; i < max_elems; i++, env->vstart++) {
601             target_ulong addr = base + ((i + k * max_elems) << esz);
602             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
603         }
604     }
605 
606     env->vstart = 0;
607 }
608 
609 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
610 void HELPER(NAME)(void *vd, target_ulong base,       \
611                   CPURISCVState *env, uint32_t desc) \
612 {                                                    \
613     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
614                     ctzl(sizeof(ETYPE)), GETPC());   \
615 }
616 
617 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
618 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
619 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
620 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
621 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
622 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
623 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
624 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
625 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
626 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
627 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
628 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
629 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
630 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
631 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
632 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
633 
634 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
635 void HELPER(NAME)(void *vd, target_ulong base,       \
636                   CPURISCVState *env, uint32_t desc) \
637 {                                                    \
638     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
639                     ctzl(sizeof(ETYPE)), GETPC());   \
640 }
641 
642 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
643 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
644 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
645 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
646 
647 /*
648  *** Vector Integer Arithmetic Instructions
649  */
650 
651 /* expand macro args before macro */
652 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
653 
654 /* (TD, T1, T2, TX1, TX2) */
655 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
656 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
657 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
658 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
659 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
660 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
661 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
662 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
663 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
664 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
665 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
666 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
667 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
668 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
669 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
670 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
671 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
672 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
673 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
674 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
675 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
676 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
677 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
678 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
679 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
680 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
681 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
682 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
683 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
684 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
685 
686 /* operation of two vector elements */
687 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
688 
689 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
690 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
691 {                                                               \
692     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
693     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
694     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
695 }
696 #define DO_SUB(N, M) (N - M)
697 #define DO_RSUB(N, M) (M - N)
698 
699 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
700 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
701 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
702 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
703 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
704 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
705 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
706 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
707 
708 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
709                        CPURISCVState *env, uint32_t desc,
710                        opivv2_fn *fn)
711 {
712     uint32_t vm = vext_vm(desc);
713     uint32_t vl = env->vl;
714     uint32_t i;
715 
716     for (i = env->vstart; i < vl; i++) {
717         if (!vm && !vext_elem_mask(v0, i)) {
718             continue;
719         }
720         fn(vd, vs1, vs2, i);
721     }
722     env->vstart = 0;
723 }
724 
725 /* generate the helpers for OPIVV */
726 #define GEN_VEXT_VV(NAME)                                 \
727 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
728                   void *vs2, CPURISCVState *env,          \
729                   uint32_t desc)                          \
730 {                                                         \
731     do_vext_vv(vd, v0, vs1, vs2, env, desc,               \
732                do_##NAME);                                \
733 }
734 
735 GEN_VEXT_VV(vadd_vv_b)
736 GEN_VEXT_VV(vadd_vv_h)
737 GEN_VEXT_VV(vadd_vv_w)
738 GEN_VEXT_VV(vadd_vv_d)
739 GEN_VEXT_VV(vsub_vv_b)
740 GEN_VEXT_VV(vsub_vv_h)
741 GEN_VEXT_VV(vsub_vv_w)
742 GEN_VEXT_VV(vsub_vv_d)
743 
744 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
745 
746 /*
747  * (T1)s1 gives the real operator type.
748  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
749  */
750 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
751 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
752 {                                                                   \
753     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
754     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
755 }
756 
757 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
758 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
759 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
760 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
761 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
762 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
763 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
764 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
765 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
766 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
767 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
768 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
769 
770 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
771                        CPURISCVState *env, uint32_t desc,
772                        opivx2_fn fn)
773 {
774     uint32_t vm = vext_vm(desc);
775     uint32_t vl = env->vl;
776     uint32_t i;
777 
778     for (i = env->vstart; i < vl; i++) {
779         if (!vm && !vext_elem_mask(v0, i)) {
780             continue;
781         }
782         fn(vd, s1, vs2, i);
783     }
784     env->vstart = 0;
785 }
786 
787 /* generate the helpers for OPIVX */
788 #define GEN_VEXT_VX(NAME)                                 \
789 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
790                   void *vs2, CPURISCVState *env,          \
791                   uint32_t desc)                          \
792 {                                                         \
793     do_vext_vx(vd, v0, s1, vs2, env, desc,                \
794                do_##NAME);                                \
795 }
796 
797 GEN_VEXT_VX(vadd_vx_b)
798 GEN_VEXT_VX(vadd_vx_h)
799 GEN_VEXT_VX(vadd_vx_w)
800 GEN_VEXT_VX(vadd_vx_d)
801 GEN_VEXT_VX(vsub_vx_b)
802 GEN_VEXT_VX(vsub_vx_h)
803 GEN_VEXT_VX(vsub_vx_w)
804 GEN_VEXT_VX(vsub_vx_d)
805 GEN_VEXT_VX(vrsub_vx_b)
806 GEN_VEXT_VX(vrsub_vx_h)
807 GEN_VEXT_VX(vrsub_vx_w)
808 GEN_VEXT_VX(vrsub_vx_d)
809 
810 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
811 {
812     intptr_t oprsz = simd_oprsz(desc);
813     intptr_t i;
814 
815     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
816         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
817     }
818 }
819 
820 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
821 {
822     intptr_t oprsz = simd_oprsz(desc);
823     intptr_t i;
824 
825     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
826         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
827     }
828 }
829 
830 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
831 {
832     intptr_t oprsz = simd_oprsz(desc);
833     intptr_t i;
834 
835     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
836         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
837     }
838 }
839 
840 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
841 {
842     intptr_t oprsz = simd_oprsz(desc);
843     intptr_t i;
844 
845     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
846         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
847     }
848 }
849 
850 /* Vector Widening Integer Add/Subtract */
851 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
852 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
853 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
854 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
855 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
856 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
857 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
858 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
859 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
860 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
861 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
862 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
863 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
864 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
865 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
866 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
867 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
868 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
869 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
870 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
871 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
872 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
873 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
874 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
875 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
876 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
877 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
878 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
879 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
880 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
881 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
882 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
883 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
884 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
885 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
886 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
887 GEN_VEXT_VV(vwaddu_vv_b)
888 GEN_VEXT_VV(vwaddu_vv_h)
889 GEN_VEXT_VV(vwaddu_vv_w)
890 GEN_VEXT_VV(vwsubu_vv_b)
891 GEN_VEXT_VV(vwsubu_vv_h)
892 GEN_VEXT_VV(vwsubu_vv_w)
893 GEN_VEXT_VV(vwadd_vv_b)
894 GEN_VEXT_VV(vwadd_vv_h)
895 GEN_VEXT_VV(vwadd_vv_w)
896 GEN_VEXT_VV(vwsub_vv_b)
897 GEN_VEXT_VV(vwsub_vv_h)
898 GEN_VEXT_VV(vwsub_vv_w)
899 GEN_VEXT_VV(vwaddu_wv_b)
900 GEN_VEXT_VV(vwaddu_wv_h)
901 GEN_VEXT_VV(vwaddu_wv_w)
902 GEN_VEXT_VV(vwsubu_wv_b)
903 GEN_VEXT_VV(vwsubu_wv_h)
904 GEN_VEXT_VV(vwsubu_wv_w)
905 GEN_VEXT_VV(vwadd_wv_b)
906 GEN_VEXT_VV(vwadd_wv_h)
907 GEN_VEXT_VV(vwadd_wv_w)
908 GEN_VEXT_VV(vwsub_wv_b)
909 GEN_VEXT_VV(vwsub_wv_h)
910 GEN_VEXT_VV(vwsub_wv_w)
911 
912 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
913 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
914 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
915 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
916 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
917 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
918 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
919 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
920 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
921 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
922 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
923 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
924 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
925 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
926 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
927 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
928 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
929 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
930 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
931 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
932 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
933 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
934 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
935 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
936 GEN_VEXT_VX(vwaddu_vx_b)
937 GEN_VEXT_VX(vwaddu_vx_h)
938 GEN_VEXT_VX(vwaddu_vx_w)
939 GEN_VEXT_VX(vwsubu_vx_b)
940 GEN_VEXT_VX(vwsubu_vx_h)
941 GEN_VEXT_VX(vwsubu_vx_w)
942 GEN_VEXT_VX(vwadd_vx_b)
943 GEN_VEXT_VX(vwadd_vx_h)
944 GEN_VEXT_VX(vwadd_vx_w)
945 GEN_VEXT_VX(vwsub_vx_b)
946 GEN_VEXT_VX(vwsub_vx_h)
947 GEN_VEXT_VX(vwsub_vx_w)
948 GEN_VEXT_VX(vwaddu_wx_b)
949 GEN_VEXT_VX(vwaddu_wx_h)
950 GEN_VEXT_VX(vwaddu_wx_w)
951 GEN_VEXT_VX(vwsubu_wx_b)
952 GEN_VEXT_VX(vwsubu_wx_h)
953 GEN_VEXT_VX(vwsubu_wx_w)
954 GEN_VEXT_VX(vwadd_wx_b)
955 GEN_VEXT_VX(vwadd_wx_h)
956 GEN_VEXT_VX(vwadd_wx_w)
957 GEN_VEXT_VX(vwsub_wx_b)
958 GEN_VEXT_VX(vwsub_wx_h)
959 GEN_VEXT_VX(vwsub_wx_w)
960 
961 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
962 #define DO_VADC(N, M, C) (N + M + C)
963 #define DO_VSBC(N, M, C) (N - M - C)
964 
965 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
966 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
967                   CPURISCVState *env, uint32_t desc)          \
968 {                                                             \
969     uint32_t vl = env->vl;                                    \
970     uint32_t i;                                               \
971                                                               \
972     for (i = env->vstart; i < vl; i++) {                      \
973         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
974         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
975         ETYPE carry = vext_elem_mask(v0, i);                  \
976                                                               \
977         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
978     }                                                         \
979     env->vstart = 0;                                          \
980 }
981 
982 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
983 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
984 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
985 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
986 
987 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
988 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
989 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
990 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
991 
992 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
993 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
994                   CPURISCVState *env, uint32_t desc)                     \
995 {                                                                        \
996     uint32_t vl = env->vl;                                               \
997     uint32_t i;                                                          \
998                                                                          \
999     for (i = env->vstart; i < vl; i++) {                                 \
1000         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
1001         ETYPE carry = vext_elem_mask(v0, i);                             \
1002                                                                          \
1003         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1004     }                                                                    \
1005     env->vstart = 0;                                          \
1006 }
1007 
1008 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
1009 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1010 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1011 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1012 
1013 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
1014 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1015 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1016 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1017 
1018 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
1019                           (__typeof(N))(N + M) < N)
1020 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1021 
1022 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
1023 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1024                   CPURISCVState *env, uint32_t desc)          \
1025 {                                                             \
1026     uint32_t vl = env->vl;                                    \
1027     uint32_t vm = vext_vm(desc);                              \
1028     uint32_t i;                                               \
1029                                                               \
1030     for (i = env->vstart; i < vl; i++) {                      \
1031         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1032         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1033         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1034         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1035     }                                                         \
1036     env->vstart = 0;                                          \
1037 }
1038 
1039 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1040 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1041 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1042 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1043 
1044 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1045 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1046 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1047 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1048 
1049 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1050 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1051                   void *vs2, CPURISCVState *env, uint32_t desc) \
1052 {                                                               \
1053     uint32_t vl = env->vl;                                      \
1054     uint32_t vm = vext_vm(desc);                                \
1055     uint32_t i;                                                 \
1056                                                                 \
1057     for (i = env->vstart; i < vl; i++) {                        \
1058         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1059         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1060         vext_set_elem_mask(vd, i,                               \
1061                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1062     }                                                           \
1063     env->vstart = 0;                                            \
1064 }
1065 
1066 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1067 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1068 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1069 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1070 
1071 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1072 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1073 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1074 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1075 
1076 /* Vector Bitwise Logical Instructions */
1077 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1078 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1079 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1080 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1081 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1082 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1083 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1084 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1085 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1086 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1087 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1088 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1089 GEN_VEXT_VV(vand_vv_b)
1090 GEN_VEXT_VV(vand_vv_h)
1091 GEN_VEXT_VV(vand_vv_w)
1092 GEN_VEXT_VV(vand_vv_d)
1093 GEN_VEXT_VV(vor_vv_b)
1094 GEN_VEXT_VV(vor_vv_h)
1095 GEN_VEXT_VV(vor_vv_w)
1096 GEN_VEXT_VV(vor_vv_d)
1097 GEN_VEXT_VV(vxor_vv_b)
1098 GEN_VEXT_VV(vxor_vv_h)
1099 GEN_VEXT_VV(vxor_vv_w)
1100 GEN_VEXT_VV(vxor_vv_d)
1101 
1102 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1103 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1104 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1105 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1106 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1107 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1108 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1109 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1110 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1111 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1112 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1113 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1114 GEN_VEXT_VX(vand_vx_b)
1115 GEN_VEXT_VX(vand_vx_h)
1116 GEN_VEXT_VX(vand_vx_w)
1117 GEN_VEXT_VX(vand_vx_d)
1118 GEN_VEXT_VX(vor_vx_b)
1119 GEN_VEXT_VX(vor_vx_h)
1120 GEN_VEXT_VX(vor_vx_w)
1121 GEN_VEXT_VX(vor_vx_d)
1122 GEN_VEXT_VX(vxor_vx_b)
1123 GEN_VEXT_VX(vxor_vx_h)
1124 GEN_VEXT_VX(vxor_vx_w)
1125 GEN_VEXT_VX(vxor_vx_d)
1126 
1127 /* Vector Single-Width Bit Shift Instructions */
1128 #define DO_SLL(N, M)  (N << (M))
1129 #define DO_SRL(N, M)  (N >> (M))
1130 
1131 /* generate the helpers for shift instructions with two vector operators */
1132 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1133 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1134                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1135 {                                                                         \
1136     uint32_t vm = vext_vm(desc);                                          \
1137     uint32_t vl = env->vl;                                                \
1138     uint32_t i;                                                           \
1139                                                                           \
1140     for (i = env->vstart; i < vl; i++) {                                  \
1141         if (!vm && !vext_elem_mask(v0, i)) {                              \
1142             continue;                                                     \
1143         }                                                                 \
1144         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1145         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1146         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1147     }                                                                     \
1148     env->vstart = 0;                                                      \
1149 }
1150 
1151 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1152 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1153 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1154 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1155 
1156 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1157 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1158 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1159 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1160 
1161 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1162 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1163 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1164 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1165 
1166 /* generate the helpers for shift instructions with one vector and one scalar */
1167 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1168 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1169         void *vs2, CPURISCVState *env, uint32_t desc)       \
1170 {                                                           \
1171     uint32_t vm = vext_vm(desc);                            \
1172     uint32_t vl = env->vl;                                  \
1173     uint32_t i;                                             \
1174                                                             \
1175     for (i = env->vstart; i < vl; i++) {                    \
1176         if (!vm && !vext_elem_mask(v0, i)) {                \
1177             continue;                                       \
1178         }                                                   \
1179         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1180         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1181     }                                                       \
1182     env->vstart = 0;                                        \
1183 }
1184 
1185 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1186 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1187 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1188 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1189 
1190 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1191 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1192 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1193 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1194 
1195 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1196 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1197 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1198 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1199 
1200 /* Vector Narrowing Integer Right Shift Instructions */
1201 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1202 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1203 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1204 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1205 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1206 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1207 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1208 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1209 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1210 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1211 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1212 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1213 
1214 /* Vector Integer Comparison Instructions */
1215 #define DO_MSEQ(N, M) (N == M)
1216 #define DO_MSNE(N, M) (N != M)
1217 #define DO_MSLT(N, M) (N < M)
1218 #define DO_MSLE(N, M) (N <= M)
1219 #define DO_MSGT(N, M) (N > M)
1220 
1221 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1222 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1223                   CPURISCVState *env, uint32_t desc)          \
1224 {                                                             \
1225     uint32_t vm = vext_vm(desc);                              \
1226     uint32_t vl = env->vl;                                    \
1227     uint32_t i;                                               \
1228                                                               \
1229     for (i = env->vstart; i < vl; i++) {                      \
1230         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1231         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1232         if (!vm && !vext_elem_mask(v0, i)) {                  \
1233             continue;                                         \
1234         }                                                     \
1235         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1236     }                                                         \
1237     env->vstart = 0;                                          \
1238 }
1239 
1240 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1241 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1242 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1243 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1244 
1245 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1246 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1247 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1248 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1249 
1250 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1251 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1252 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1253 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1254 
1255 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1256 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1257 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1258 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1259 
1260 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1261 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1262 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1263 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1264 
1265 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1266 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1267 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1268 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1269 
1270 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1271 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1272                   CPURISCVState *env, uint32_t desc)                \
1273 {                                                                   \
1274     uint32_t vm = vext_vm(desc);                                    \
1275     uint32_t vl = env->vl;                                          \
1276     uint32_t i;                                                     \
1277                                                                     \
1278     for (i = env->vstart; i < vl; i++) {                            \
1279         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1280         if (!vm && !vext_elem_mask(v0, i)) {                        \
1281             continue;                                               \
1282         }                                                           \
1283         vext_set_elem_mask(vd, i,                                   \
1284                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1285     }                                                               \
1286     env->vstart = 0;                                                \
1287 }
1288 
1289 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1290 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1291 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1292 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1293 
1294 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1295 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1296 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1297 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1298 
1299 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1300 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1301 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1302 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1303 
1304 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1305 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1306 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1307 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1308 
1309 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1310 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1311 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1312 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1313 
1314 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1315 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1316 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1317 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1318 
1319 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1320 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1321 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1322 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1323 
1324 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1325 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1326 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1327 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1328 
1329 /* Vector Integer Min/Max Instructions */
1330 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1331 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1332 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1333 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1334 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1335 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1336 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1337 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1338 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1339 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1340 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1341 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1342 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1343 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1344 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1345 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1346 GEN_VEXT_VV(vminu_vv_b)
1347 GEN_VEXT_VV(vminu_vv_h)
1348 GEN_VEXT_VV(vminu_vv_w)
1349 GEN_VEXT_VV(vminu_vv_d)
1350 GEN_VEXT_VV(vmin_vv_b)
1351 GEN_VEXT_VV(vmin_vv_h)
1352 GEN_VEXT_VV(vmin_vv_w)
1353 GEN_VEXT_VV(vmin_vv_d)
1354 GEN_VEXT_VV(vmaxu_vv_b)
1355 GEN_VEXT_VV(vmaxu_vv_h)
1356 GEN_VEXT_VV(vmaxu_vv_w)
1357 GEN_VEXT_VV(vmaxu_vv_d)
1358 GEN_VEXT_VV(vmax_vv_b)
1359 GEN_VEXT_VV(vmax_vv_h)
1360 GEN_VEXT_VV(vmax_vv_w)
1361 GEN_VEXT_VV(vmax_vv_d)
1362 
1363 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1364 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1365 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1366 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1367 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1368 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1369 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1370 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1371 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1372 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1373 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1374 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1375 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1376 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1377 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1378 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1379 GEN_VEXT_VX(vminu_vx_b)
1380 GEN_VEXT_VX(vminu_vx_h)
1381 GEN_VEXT_VX(vminu_vx_w)
1382 GEN_VEXT_VX(vminu_vx_d)
1383 GEN_VEXT_VX(vmin_vx_b)
1384 GEN_VEXT_VX(vmin_vx_h)
1385 GEN_VEXT_VX(vmin_vx_w)
1386 GEN_VEXT_VX(vmin_vx_d)
1387 GEN_VEXT_VX(vmaxu_vx_b)
1388 GEN_VEXT_VX(vmaxu_vx_h)
1389 GEN_VEXT_VX(vmaxu_vx_w)
1390 GEN_VEXT_VX(vmaxu_vx_d)
1391 GEN_VEXT_VX(vmax_vx_b)
1392 GEN_VEXT_VX(vmax_vx_h)
1393 GEN_VEXT_VX(vmax_vx_w)
1394 GEN_VEXT_VX(vmax_vx_d)
1395 
1396 /* Vector Single-Width Integer Multiply Instructions */
1397 #define DO_MUL(N, M) (N * M)
1398 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1399 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1400 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1401 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1402 GEN_VEXT_VV(vmul_vv_b)
1403 GEN_VEXT_VV(vmul_vv_h)
1404 GEN_VEXT_VV(vmul_vv_w)
1405 GEN_VEXT_VV(vmul_vv_d)
1406 
1407 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1408 {
1409     return (int16_t)s2 * (int16_t)s1 >> 8;
1410 }
1411 
1412 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1413 {
1414     return (int32_t)s2 * (int32_t)s1 >> 16;
1415 }
1416 
1417 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1418 {
1419     return (int64_t)s2 * (int64_t)s1 >> 32;
1420 }
1421 
1422 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1423 {
1424     uint64_t hi_64, lo_64;
1425 
1426     muls64(&lo_64, &hi_64, s1, s2);
1427     return hi_64;
1428 }
1429 
1430 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1431 {
1432     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1433 }
1434 
1435 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1436 {
1437     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1438 }
1439 
1440 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1441 {
1442     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1443 }
1444 
1445 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1446 {
1447     uint64_t hi_64, lo_64;
1448 
1449     mulu64(&lo_64, &hi_64, s2, s1);
1450     return hi_64;
1451 }
1452 
1453 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1454 {
1455     return (int16_t)s2 * (uint16_t)s1 >> 8;
1456 }
1457 
1458 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1459 {
1460     return (int32_t)s2 * (uint32_t)s1 >> 16;
1461 }
1462 
1463 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1464 {
1465     return (int64_t)s2 * (uint64_t)s1 >> 32;
1466 }
1467 
1468 /*
1469  * Let  A = signed operand,
1470  *      B = unsigned operand
1471  *      P = mulu64(A, B), unsigned product
1472  *
1473  * LET  X = 2 ** 64  - A, 2's complement of A
1474  *      SP = signed product
1475  * THEN
1476  *      IF A < 0
1477  *          SP = -X * B
1478  *             = -(2 ** 64 - A) * B
1479  *             = A * B - 2 ** 64 * B
1480  *             = P - 2 ** 64 * B
1481  *      ELSE
1482  *          SP = P
1483  * THEN
1484  *      HI_P -= (A < 0 ? B : 0)
1485  */
1486 
1487 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1488 {
1489     uint64_t hi_64, lo_64;
1490 
1491     mulu64(&lo_64, &hi_64, s2, s1);
1492 
1493     hi_64 -= s2 < 0 ? s1 : 0;
1494     return hi_64;
1495 }
1496 
1497 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1498 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1499 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1500 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1501 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1502 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1503 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1504 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1505 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1506 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1507 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1508 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1509 GEN_VEXT_VV(vmulh_vv_b)
1510 GEN_VEXT_VV(vmulh_vv_h)
1511 GEN_VEXT_VV(vmulh_vv_w)
1512 GEN_VEXT_VV(vmulh_vv_d)
1513 GEN_VEXT_VV(vmulhu_vv_b)
1514 GEN_VEXT_VV(vmulhu_vv_h)
1515 GEN_VEXT_VV(vmulhu_vv_w)
1516 GEN_VEXT_VV(vmulhu_vv_d)
1517 GEN_VEXT_VV(vmulhsu_vv_b)
1518 GEN_VEXT_VV(vmulhsu_vv_h)
1519 GEN_VEXT_VV(vmulhsu_vv_w)
1520 GEN_VEXT_VV(vmulhsu_vv_d)
1521 
1522 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1523 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1524 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1525 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1526 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1527 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1528 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1529 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1530 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1531 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1532 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1533 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1534 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1535 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1536 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1537 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1538 GEN_VEXT_VX(vmul_vx_b)
1539 GEN_VEXT_VX(vmul_vx_h)
1540 GEN_VEXT_VX(vmul_vx_w)
1541 GEN_VEXT_VX(vmul_vx_d)
1542 GEN_VEXT_VX(vmulh_vx_b)
1543 GEN_VEXT_VX(vmulh_vx_h)
1544 GEN_VEXT_VX(vmulh_vx_w)
1545 GEN_VEXT_VX(vmulh_vx_d)
1546 GEN_VEXT_VX(vmulhu_vx_b)
1547 GEN_VEXT_VX(vmulhu_vx_h)
1548 GEN_VEXT_VX(vmulhu_vx_w)
1549 GEN_VEXT_VX(vmulhu_vx_d)
1550 GEN_VEXT_VX(vmulhsu_vx_b)
1551 GEN_VEXT_VX(vmulhsu_vx_h)
1552 GEN_VEXT_VX(vmulhsu_vx_w)
1553 GEN_VEXT_VX(vmulhsu_vx_d)
1554 
1555 /* Vector Integer Divide Instructions */
1556 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1557 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1558 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1559         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1560 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1561         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1562 
1563 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1564 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1565 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1566 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1567 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1568 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1569 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1570 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1571 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1572 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1573 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1574 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1575 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1576 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1577 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1578 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1579 GEN_VEXT_VV(vdivu_vv_b)
1580 GEN_VEXT_VV(vdivu_vv_h)
1581 GEN_VEXT_VV(vdivu_vv_w)
1582 GEN_VEXT_VV(vdivu_vv_d)
1583 GEN_VEXT_VV(vdiv_vv_b)
1584 GEN_VEXT_VV(vdiv_vv_h)
1585 GEN_VEXT_VV(vdiv_vv_w)
1586 GEN_VEXT_VV(vdiv_vv_d)
1587 GEN_VEXT_VV(vremu_vv_b)
1588 GEN_VEXT_VV(vremu_vv_h)
1589 GEN_VEXT_VV(vremu_vv_w)
1590 GEN_VEXT_VV(vremu_vv_d)
1591 GEN_VEXT_VV(vrem_vv_b)
1592 GEN_VEXT_VV(vrem_vv_h)
1593 GEN_VEXT_VV(vrem_vv_w)
1594 GEN_VEXT_VV(vrem_vv_d)
1595 
1596 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1597 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1598 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1599 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1600 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1601 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1602 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1603 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1604 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1605 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1606 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1607 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1608 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1609 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1610 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1611 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1612 GEN_VEXT_VX(vdivu_vx_b)
1613 GEN_VEXT_VX(vdivu_vx_h)
1614 GEN_VEXT_VX(vdivu_vx_w)
1615 GEN_VEXT_VX(vdivu_vx_d)
1616 GEN_VEXT_VX(vdiv_vx_b)
1617 GEN_VEXT_VX(vdiv_vx_h)
1618 GEN_VEXT_VX(vdiv_vx_w)
1619 GEN_VEXT_VX(vdiv_vx_d)
1620 GEN_VEXT_VX(vremu_vx_b)
1621 GEN_VEXT_VX(vremu_vx_h)
1622 GEN_VEXT_VX(vremu_vx_w)
1623 GEN_VEXT_VX(vremu_vx_d)
1624 GEN_VEXT_VX(vrem_vx_b)
1625 GEN_VEXT_VX(vrem_vx_h)
1626 GEN_VEXT_VX(vrem_vx_w)
1627 GEN_VEXT_VX(vrem_vx_d)
1628 
1629 /* Vector Widening Integer Multiply Instructions */
1630 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1631 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1632 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1633 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1634 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1635 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1636 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1637 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1638 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1639 GEN_VEXT_VV(vwmul_vv_b)
1640 GEN_VEXT_VV(vwmul_vv_h)
1641 GEN_VEXT_VV(vwmul_vv_w)
1642 GEN_VEXT_VV(vwmulu_vv_b)
1643 GEN_VEXT_VV(vwmulu_vv_h)
1644 GEN_VEXT_VV(vwmulu_vv_w)
1645 GEN_VEXT_VV(vwmulsu_vv_b)
1646 GEN_VEXT_VV(vwmulsu_vv_h)
1647 GEN_VEXT_VV(vwmulsu_vv_w)
1648 
1649 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1650 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1651 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1652 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1653 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1654 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1655 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1656 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1657 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1658 GEN_VEXT_VX(vwmul_vx_b)
1659 GEN_VEXT_VX(vwmul_vx_h)
1660 GEN_VEXT_VX(vwmul_vx_w)
1661 GEN_VEXT_VX(vwmulu_vx_b)
1662 GEN_VEXT_VX(vwmulu_vx_h)
1663 GEN_VEXT_VX(vwmulu_vx_w)
1664 GEN_VEXT_VX(vwmulsu_vx_b)
1665 GEN_VEXT_VX(vwmulsu_vx_h)
1666 GEN_VEXT_VX(vwmulsu_vx_w)
1667 
1668 /* Vector Single-Width Integer Multiply-Add Instructions */
1669 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1670 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1671 {                                                                  \
1672     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1673     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1674     TD d = *((TD *)vd + HD(i));                                    \
1675     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1676 }
1677 
1678 #define DO_MACC(N, M, D) (M * N + D)
1679 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1680 #define DO_MADD(N, M, D) (M * D + N)
1681 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1682 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1683 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1684 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1685 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1686 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1687 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1688 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1689 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1690 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1691 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1692 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1693 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1694 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1695 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1696 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1697 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1698 GEN_VEXT_VV(vmacc_vv_b)
1699 GEN_VEXT_VV(vmacc_vv_h)
1700 GEN_VEXT_VV(vmacc_vv_w)
1701 GEN_VEXT_VV(vmacc_vv_d)
1702 GEN_VEXT_VV(vnmsac_vv_b)
1703 GEN_VEXT_VV(vnmsac_vv_h)
1704 GEN_VEXT_VV(vnmsac_vv_w)
1705 GEN_VEXT_VV(vnmsac_vv_d)
1706 GEN_VEXT_VV(vmadd_vv_b)
1707 GEN_VEXT_VV(vmadd_vv_h)
1708 GEN_VEXT_VV(vmadd_vv_w)
1709 GEN_VEXT_VV(vmadd_vv_d)
1710 GEN_VEXT_VV(vnmsub_vv_b)
1711 GEN_VEXT_VV(vnmsub_vv_h)
1712 GEN_VEXT_VV(vnmsub_vv_w)
1713 GEN_VEXT_VV(vnmsub_vv_d)
1714 
1715 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1716 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1717 {                                                                   \
1718     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1719     TD d = *((TD *)vd + HD(i));                                     \
1720     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1721 }
1722 
1723 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1724 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1725 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1726 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1727 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1728 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1729 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1730 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1731 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1732 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1733 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1734 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1735 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1736 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1737 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1738 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1739 GEN_VEXT_VX(vmacc_vx_b)
1740 GEN_VEXT_VX(vmacc_vx_h)
1741 GEN_VEXT_VX(vmacc_vx_w)
1742 GEN_VEXT_VX(vmacc_vx_d)
1743 GEN_VEXT_VX(vnmsac_vx_b)
1744 GEN_VEXT_VX(vnmsac_vx_h)
1745 GEN_VEXT_VX(vnmsac_vx_w)
1746 GEN_VEXT_VX(vnmsac_vx_d)
1747 GEN_VEXT_VX(vmadd_vx_b)
1748 GEN_VEXT_VX(vmadd_vx_h)
1749 GEN_VEXT_VX(vmadd_vx_w)
1750 GEN_VEXT_VX(vmadd_vx_d)
1751 GEN_VEXT_VX(vnmsub_vx_b)
1752 GEN_VEXT_VX(vnmsub_vx_h)
1753 GEN_VEXT_VX(vnmsub_vx_w)
1754 GEN_VEXT_VX(vnmsub_vx_d)
1755 
1756 /* Vector Widening Integer Multiply-Add Instructions */
1757 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1758 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1759 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1760 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1761 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1762 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1763 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1764 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1765 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1766 GEN_VEXT_VV(vwmaccu_vv_b)
1767 GEN_VEXT_VV(vwmaccu_vv_h)
1768 GEN_VEXT_VV(vwmaccu_vv_w)
1769 GEN_VEXT_VV(vwmacc_vv_b)
1770 GEN_VEXT_VV(vwmacc_vv_h)
1771 GEN_VEXT_VV(vwmacc_vv_w)
1772 GEN_VEXT_VV(vwmaccsu_vv_b)
1773 GEN_VEXT_VV(vwmaccsu_vv_h)
1774 GEN_VEXT_VV(vwmaccsu_vv_w)
1775 
1776 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1777 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1778 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1779 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1780 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1781 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1782 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1783 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1784 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1785 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1786 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1787 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1788 GEN_VEXT_VX(vwmaccu_vx_b)
1789 GEN_VEXT_VX(vwmaccu_vx_h)
1790 GEN_VEXT_VX(vwmaccu_vx_w)
1791 GEN_VEXT_VX(vwmacc_vx_b)
1792 GEN_VEXT_VX(vwmacc_vx_h)
1793 GEN_VEXT_VX(vwmacc_vx_w)
1794 GEN_VEXT_VX(vwmaccsu_vx_b)
1795 GEN_VEXT_VX(vwmaccsu_vx_h)
1796 GEN_VEXT_VX(vwmaccsu_vx_w)
1797 GEN_VEXT_VX(vwmaccus_vx_b)
1798 GEN_VEXT_VX(vwmaccus_vx_h)
1799 GEN_VEXT_VX(vwmaccus_vx_w)
1800 
1801 /* Vector Integer Merge and Move Instructions */
1802 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1803 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1804                   uint32_t desc)                                     \
1805 {                                                                    \
1806     uint32_t vl = env->vl;                                           \
1807     uint32_t i;                                                      \
1808                                                                      \
1809     for (i = env->vstart; i < vl; i++) {                             \
1810         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1811         *((ETYPE *)vd + H(i)) = s1;                                  \
1812     }                                                                \
1813     env->vstart = 0;                                                 \
1814 }
1815 
1816 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1817 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1818 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1819 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1820 
1821 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1822 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1823                   uint32_t desc)                                     \
1824 {                                                                    \
1825     uint32_t vl = env->vl;                                           \
1826     uint32_t i;                                                      \
1827                                                                      \
1828     for (i = env->vstart; i < vl; i++) {                             \
1829         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1830     }                                                                \
1831     env->vstart = 0;                                                 \
1832 }
1833 
1834 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1835 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1836 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1837 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1838 
1839 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1840 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1841                   CPURISCVState *env, uint32_t desc)                 \
1842 {                                                                    \
1843     uint32_t vl = env->vl;                                           \
1844     uint32_t i;                                                      \
1845                                                                      \
1846     for (i = env->vstart; i < vl; i++) {                             \
1847         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1848         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1849     }                                                                \
1850     env->vstart = 0;                                                 \
1851 }
1852 
1853 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1854 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1855 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1856 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1857 
1858 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1859 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1860                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1861 {                                                                    \
1862     uint32_t vl = env->vl;                                           \
1863     uint32_t i;                                                      \
1864                                                                      \
1865     for (i = env->vstart; i < vl; i++) {                             \
1866         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1867         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1868                    (ETYPE)(target_long)s1);                          \
1869         *((ETYPE *)vd + H(i)) = d;                                   \
1870     }                                                                \
1871     env->vstart = 0;                                                 \
1872 }
1873 
1874 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1875 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1876 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1877 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1878 
1879 /*
1880  *** Vector Fixed-Point Arithmetic Instructions
1881  */
1882 
1883 /* Vector Single-Width Saturating Add and Subtract */
1884 
1885 /*
1886  * As fixed point instructions probably have round mode and saturation,
1887  * define common macros for fixed point here.
1888  */
1889 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1890                           CPURISCVState *env, int vxrm);
1891 
1892 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1893 static inline void                                                  \
1894 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1895           CPURISCVState *env, int vxrm)                             \
1896 {                                                                   \
1897     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1898     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1899     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1900 }
1901 
1902 static inline void
1903 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1904              CPURISCVState *env,
1905              uint32_t vl, uint32_t vm, int vxrm,
1906              opivv2_rm_fn *fn)
1907 {
1908     for (uint32_t i = env->vstart; i < vl; i++) {
1909         if (!vm && !vext_elem_mask(v0, i)) {
1910             continue;
1911         }
1912         fn(vd, vs1, vs2, i, env, vxrm);
1913     }
1914     env->vstart = 0;
1915 }
1916 
1917 static inline void
1918 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1919              CPURISCVState *env,
1920              uint32_t desc,
1921              opivv2_rm_fn *fn)
1922 {
1923     uint32_t vm = vext_vm(desc);
1924     uint32_t vl = env->vl;
1925 
1926     switch (env->vxrm) {
1927     case 0: /* rnu */
1928         vext_vv_rm_1(vd, v0, vs1, vs2,
1929                      env, vl, vm, 0, fn);
1930         break;
1931     case 1: /* rne */
1932         vext_vv_rm_1(vd, v0, vs1, vs2,
1933                      env, vl, vm, 1, fn);
1934         break;
1935     case 2: /* rdn */
1936         vext_vv_rm_1(vd, v0, vs1, vs2,
1937                      env, vl, vm, 2, fn);
1938         break;
1939     default: /* rod */
1940         vext_vv_rm_1(vd, v0, vs1, vs2,
1941                      env, vl, vm, 3, fn);
1942         break;
1943     }
1944 }
1945 
1946 /* generate helpers for fixed point instructions with OPIVV format */
1947 #define GEN_VEXT_VV_RM(NAME)                                    \
1948 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1949                   CPURISCVState *env, uint32_t desc)            \
1950 {                                                               \
1951     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc,                   \
1952                  do_##NAME);                                    \
1953 }
1954 
1955 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1956 {
1957     uint8_t res = a + b;
1958     if (res < a) {
1959         res = UINT8_MAX;
1960         env->vxsat = 0x1;
1961     }
1962     return res;
1963 }
1964 
1965 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1966                                uint16_t b)
1967 {
1968     uint16_t res = a + b;
1969     if (res < a) {
1970         res = UINT16_MAX;
1971         env->vxsat = 0x1;
1972     }
1973     return res;
1974 }
1975 
1976 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1977                                uint32_t b)
1978 {
1979     uint32_t res = a + b;
1980     if (res < a) {
1981         res = UINT32_MAX;
1982         env->vxsat = 0x1;
1983     }
1984     return res;
1985 }
1986 
1987 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1988                                uint64_t b)
1989 {
1990     uint64_t res = a + b;
1991     if (res < a) {
1992         res = UINT64_MAX;
1993         env->vxsat = 0x1;
1994     }
1995     return res;
1996 }
1997 
1998 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1999 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2000 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2001 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2002 GEN_VEXT_VV_RM(vsaddu_vv_b)
2003 GEN_VEXT_VV_RM(vsaddu_vv_h)
2004 GEN_VEXT_VV_RM(vsaddu_vv_w)
2005 GEN_VEXT_VV_RM(vsaddu_vv_d)
2006 
2007 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2008                           CPURISCVState *env, int vxrm);
2009 
2010 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2011 static inline void                                                  \
2012 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2013           CPURISCVState *env, int vxrm)                             \
2014 {                                                                   \
2015     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2016     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2017 }
2018 
2019 static inline void
2020 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2021              CPURISCVState *env,
2022              uint32_t vl, uint32_t vm, int vxrm,
2023              opivx2_rm_fn *fn)
2024 {
2025     for (uint32_t i = env->vstart; i < vl; i++) {
2026         if (!vm && !vext_elem_mask(v0, i)) {
2027             continue;
2028         }
2029         fn(vd, s1, vs2, i, env, vxrm);
2030     }
2031     env->vstart = 0;
2032 }
2033 
2034 static inline void
2035 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2036              CPURISCVState *env,
2037              uint32_t desc,
2038              opivx2_rm_fn *fn)
2039 {
2040     uint32_t vm = vext_vm(desc);
2041     uint32_t vl = env->vl;
2042 
2043     switch (env->vxrm) {
2044     case 0: /* rnu */
2045         vext_vx_rm_1(vd, v0, s1, vs2,
2046                      env, vl, vm, 0, fn);
2047         break;
2048     case 1: /* rne */
2049         vext_vx_rm_1(vd, v0, s1, vs2,
2050                      env, vl, vm, 1, fn);
2051         break;
2052     case 2: /* rdn */
2053         vext_vx_rm_1(vd, v0, s1, vs2,
2054                      env, vl, vm, 2, fn);
2055         break;
2056     default: /* rod */
2057         vext_vx_rm_1(vd, v0, s1, vs2,
2058                      env, vl, vm, 3, fn);
2059         break;
2060     }
2061 }
2062 
2063 /* generate helpers for fixed point instructions with OPIVX format */
2064 #define GEN_VEXT_VX_RM(NAME)                              \
2065 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2066         void *vs2, CPURISCVState *env, uint32_t desc)     \
2067 {                                                         \
2068     vext_vx_rm_2(vd, v0, s1, vs2, env, desc,              \
2069                  do_##NAME);                              \
2070 }
2071 
2072 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2073 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2074 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2075 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2076 GEN_VEXT_VX_RM(vsaddu_vx_b)
2077 GEN_VEXT_VX_RM(vsaddu_vx_h)
2078 GEN_VEXT_VX_RM(vsaddu_vx_w)
2079 GEN_VEXT_VX_RM(vsaddu_vx_d)
2080 
2081 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2082 {
2083     int8_t res = a + b;
2084     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2085         res = a > 0 ? INT8_MAX : INT8_MIN;
2086         env->vxsat = 0x1;
2087     }
2088     return res;
2089 }
2090 
2091 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2092 {
2093     int16_t res = a + b;
2094     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2095         res = a > 0 ? INT16_MAX : INT16_MIN;
2096         env->vxsat = 0x1;
2097     }
2098     return res;
2099 }
2100 
2101 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2102 {
2103     int32_t res = a + b;
2104     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2105         res = a > 0 ? INT32_MAX : INT32_MIN;
2106         env->vxsat = 0x1;
2107     }
2108     return res;
2109 }
2110 
2111 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2112 {
2113     int64_t res = a + b;
2114     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2115         res = a > 0 ? INT64_MAX : INT64_MIN;
2116         env->vxsat = 0x1;
2117     }
2118     return res;
2119 }
2120 
2121 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2122 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2123 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2124 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2125 GEN_VEXT_VV_RM(vsadd_vv_b)
2126 GEN_VEXT_VV_RM(vsadd_vv_h)
2127 GEN_VEXT_VV_RM(vsadd_vv_w)
2128 GEN_VEXT_VV_RM(vsadd_vv_d)
2129 
2130 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2131 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2132 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2133 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2134 GEN_VEXT_VX_RM(vsadd_vx_b)
2135 GEN_VEXT_VX_RM(vsadd_vx_h)
2136 GEN_VEXT_VX_RM(vsadd_vx_w)
2137 GEN_VEXT_VX_RM(vsadd_vx_d)
2138 
2139 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2140 {
2141     uint8_t res = a - b;
2142     if (res > a) {
2143         res = 0;
2144         env->vxsat = 0x1;
2145     }
2146     return res;
2147 }
2148 
2149 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2150                                uint16_t b)
2151 {
2152     uint16_t res = a - b;
2153     if (res > a) {
2154         res = 0;
2155         env->vxsat = 0x1;
2156     }
2157     return res;
2158 }
2159 
2160 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2161                                uint32_t b)
2162 {
2163     uint32_t res = a - b;
2164     if (res > a) {
2165         res = 0;
2166         env->vxsat = 0x1;
2167     }
2168     return res;
2169 }
2170 
2171 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2172                                uint64_t b)
2173 {
2174     uint64_t res = a - b;
2175     if (res > a) {
2176         res = 0;
2177         env->vxsat = 0x1;
2178     }
2179     return res;
2180 }
2181 
2182 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2183 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2184 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2185 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2186 GEN_VEXT_VV_RM(vssubu_vv_b)
2187 GEN_VEXT_VV_RM(vssubu_vv_h)
2188 GEN_VEXT_VV_RM(vssubu_vv_w)
2189 GEN_VEXT_VV_RM(vssubu_vv_d)
2190 
2191 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2192 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2193 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2194 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2195 GEN_VEXT_VX_RM(vssubu_vx_b)
2196 GEN_VEXT_VX_RM(vssubu_vx_h)
2197 GEN_VEXT_VX_RM(vssubu_vx_w)
2198 GEN_VEXT_VX_RM(vssubu_vx_d)
2199 
2200 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2201 {
2202     int8_t res = a - b;
2203     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2204         res = a >= 0 ? INT8_MAX : INT8_MIN;
2205         env->vxsat = 0x1;
2206     }
2207     return res;
2208 }
2209 
2210 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2211 {
2212     int16_t res = a - b;
2213     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2214         res = a >= 0 ? INT16_MAX : INT16_MIN;
2215         env->vxsat = 0x1;
2216     }
2217     return res;
2218 }
2219 
2220 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2221 {
2222     int32_t res = a - b;
2223     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2224         res = a >= 0 ? INT32_MAX : INT32_MIN;
2225         env->vxsat = 0x1;
2226     }
2227     return res;
2228 }
2229 
2230 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2231 {
2232     int64_t res = a - b;
2233     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2234         res = a >= 0 ? INT64_MAX : INT64_MIN;
2235         env->vxsat = 0x1;
2236     }
2237     return res;
2238 }
2239 
2240 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2241 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2242 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2243 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2244 GEN_VEXT_VV_RM(vssub_vv_b)
2245 GEN_VEXT_VV_RM(vssub_vv_h)
2246 GEN_VEXT_VV_RM(vssub_vv_w)
2247 GEN_VEXT_VV_RM(vssub_vv_d)
2248 
2249 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2250 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2251 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2252 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2253 GEN_VEXT_VX_RM(vssub_vx_b)
2254 GEN_VEXT_VX_RM(vssub_vx_h)
2255 GEN_VEXT_VX_RM(vssub_vx_w)
2256 GEN_VEXT_VX_RM(vssub_vx_d)
2257 
2258 /* Vector Single-Width Averaging Add and Subtract */
2259 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2260 {
2261     uint8_t d = extract64(v, shift, 1);
2262     uint8_t d1;
2263     uint64_t D1, D2;
2264 
2265     if (shift == 0 || shift > 64) {
2266         return 0;
2267     }
2268 
2269     d1 = extract64(v, shift - 1, 1);
2270     D1 = extract64(v, 0, shift);
2271     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2272         return d1;
2273     } else if (vxrm == 1) { /* round-to-nearest-even */
2274         if (shift > 1) {
2275             D2 = extract64(v, 0, shift - 1);
2276             return d1 & ((D2 != 0) | d);
2277         } else {
2278             return d1 & d;
2279         }
2280     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2281         return !d & (D1 != 0);
2282     }
2283     return 0; /* round-down (truncate) */
2284 }
2285 
2286 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2287 {
2288     int64_t res = (int64_t)a + b;
2289     uint8_t round = get_round(vxrm, res, 1);
2290 
2291     return (res >> 1) + round;
2292 }
2293 
2294 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2295 {
2296     int64_t res = a + b;
2297     uint8_t round = get_round(vxrm, res, 1);
2298     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2299 
2300     /* With signed overflow, bit 64 is inverse of bit 63. */
2301     return ((res >> 1) ^ over) + round;
2302 }
2303 
2304 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2305 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2306 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2307 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2308 GEN_VEXT_VV_RM(vaadd_vv_b)
2309 GEN_VEXT_VV_RM(vaadd_vv_h)
2310 GEN_VEXT_VV_RM(vaadd_vv_w)
2311 GEN_VEXT_VV_RM(vaadd_vv_d)
2312 
2313 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2314 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2315 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2316 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2317 GEN_VEXT_VX_RM(vaadd_vx_b)
2318 GEN_VEXT_VX_RM(vaadd_vx_h)
2319 GEN_VEXT_VX_RM(vaadd_vx_w)
2320 GEN_VEXT_VX_RM(vaadd_vx_d)
2321 
2322 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2323                                uint32_t a, uint32_t b)
2324 {
2325     uint64_t res = (uint64_t)a + b;
2326     uint8_t round = get_round(vxrm, res, 1);
2327 
2328     return (res >> 1) + round;
2329 }
2330 
2331 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2332                                uint64_t a, uint64_t b)
2333 {
2334     uint64_t res = a + b;
2335     uint8_t round = get_round(vxrm, res, 1);
2336     uint64_t over = (uint64_t)(res < a) << 63;
2337 
2338     return ((res >> 1) | over) + round;
2339 }
2340 
2341 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2342 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2343 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2344 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2345 GEN_VEXT_VV_RM(vaaddu_vv_b)
2346 GEN_VEXT_VV_RM(vaaddu_vv_h)
2347 GEN_VEXT_VV_RM(vaaddu_vv_w)
2348 GEN_VEXT_VV_RM(vaaddu_vv_d)
2349 
2350 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2351 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2352 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2353 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2354 GEN_VEXT_VX_RM(vaaddu_vx_b)
2355 GEN_VEXT_VX_RM(vaaddu_vx_h)
2356 GEN_VEXT_VX_RM(vaaddu_vx_w)
2357 GEN_VEXT_VX_RM(vaaddu_vx_d)
2358 
2359 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2360 {
2361     int64_t res = (int64_t)a - b;
2362     uint8_t round = get_round(vxrm, res, 1);
2363 
2364     return (res >> 1) + round;
2365 }
2366 
2367 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2368 {
2369     int64_t res = (int64_t)a - b;
2370     uint8_t round = get_round(vxrm, res, 1);
2371     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2372 
2373     /* With signed overflow, bit 64 is inverse of bit 63. */
2374     return ((res >> 1) ^ over) + round;
2375 }
2376 
2377 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2378 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2379 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2380 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2381 GEN_VEXT_VV_RM(vasub_vv_b)
2382 GEN_VEXT_VV_RM(vasub_vv_h)
2383 GEN_VEXT_VV_RM(vasub_vv_w)
2384 GEN_VEXT_VV_RM(vasub_vv_d)
2385 
2386 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2387 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2388 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2389 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2390 GEN_VEXT_VX_RM(vasub_vx_b)
2391 GEN_VEXT_VX_RM(vasub_vx_h)
2392 GEN_VEXT_VX_RM(vasub_vx_w)
2393 GEN_VEXT_VX_RM(vasub_vx_d)
2394 
2395 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2396                                uint32_t a, uint32_t b)
2397 {
2398     int64_t res = (int64_t)a - b;
2399     uint8_t round = get_round(vxrm, res, 1);
2400 
2401     return (res >> 1) + round;
2402 }
2403 
2404 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2405                                uint64_t a, uint64_t b)
2406 {
2407     uint64_t res = (uint64_t)a - b;
2408     uint8_t round = get_round(vxrm, res, 1);
2409     uint64_t over = (uint64_t)(res > a) << 63;
2410 
2411     return ((res >> 1) | over) + round;
2412 }
2413 
2414 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2415 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2416 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2417 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2418 GEN_VEXT_VV_RM(vasubu_vv_b)
2419 GEN_VEXT_VV_RM(vasubu_vv_h)
2420 GEN_VEXT_VV_RM(vasubu_vv_w)
2421 GEN_VEXT_VV_RM(vasubu_vv_d)
2422 
2423 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2424 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2425 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2426 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2427 GEN_VEXT_VX_RM(vasubu_vx_b)
2428 GEN_VEXT_VX_RM(vasubu_vx_h)
2429 GEN_VEXT_VX_RM(vasubu_vx_w)
2430 GEN_VEXT_VX_RM(vasubu_vx_d)
2431 
2432 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2433 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2434 {
2435     uint8_t round;
2436     int16_t res;
2437 
2438     res = (int16_t)a * (int16_t)b;
2439     round = get_round(vxrm, res, 7);
2440     res   = (res >> 7) + round;
2441 
2442     if (res > INT8_MAX) {
2443         env->vxsat = 0x1;
2444         return INT8_MAX;
2445     } else if (res < INT8_MIN) {
2446         env->vxsat = 0x1;
2447         return INT8_MIN;
2448     } else {
2449         return res;
2450     }
2451 }
2452 
2453 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2454 {
2455     uint8_t round;
2456     int32_t res;
2457 
2458     res = (int32_t)a * (int32_t)b;
2459     round = get_round(vxrm, res, 15);
2460     res   = (res >> 15) + round;
2461 
2462     if (res > INT16_MAX) {
2463         env->vxsat = 0x1;
2464         return INT16_MAX;
2465     } else if (res < INT16_MIN) {
2466         env->vxsat = 0x1;
2467         return INT16_MIN;
2468     } else {
2469         return res;
2470     }
2471 }
2472 
2473 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2474 {
2475     uint8_t round;
2476     int64_t res;
2477 
2478     res = (int64_t)a * (int64_t)b;
2479     round = get_round(vxrm, res, 31);
2480     res   = (res >> 31) + round;
2481 
2482     if (res > INT32_MAX) {
2483         env->vxsat = 0x1;
2484         return INT32_MAX;
2485     } else if (res < INT32_MIN) {
2486         env->vxsat = 0x1;
2487         return INT32_MIN;
2488     } else {
2489         return res;
2490     }
2491 }
2492 
2493 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2494 {
2495     uint8_t round;
2496     uint64_t hi_64, lo_64;
2497     int64_t res;
2498 
2499     if (a == INT64_MIN && b == INT64_MIN) {
2500         env->vxsat = 1;
2501         return INT64_MAX;
2502     }
2503 
2504     muls64(&lo_64, &hi_64, a, b);
2505     round = get_round(vxrm, lo_64, 63);
2506     /*
2507      * Cannot overflow, as there are always
2508      * 2 sign bits after multiply.
2509      */
2510     res = (hi_64 << 1) | (lo_64 >> 63);
2511     if (round) {
2512         if (res == INT64_MAX) {
2513             env->vxsat = 1;
2514         } else {
2515             res += 1;
2516         }
2517     }
2518     return res;
2519 }
2520 
2521 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2522 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2523 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2524 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2525 GEN_VEXT_VV_RM(vsmul_vv_b)
2526 GEN_VEXT_VV_RM(vsmul_vv_h)
2527 GEN_VEXT_VV_RM(vsmul_vv_w)
2528 GEN_VEXT_VV_RM(vsmul_vv_d)
2529 
2530 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2531 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2532 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2533 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2534 GEN_VEXT_VX_RM(vsmul_vx_b)
2535 GEN_VEXT_VX_RM(vsmul_vx_h)
2536 GEN_VEXT_VX_RM(vsmul_vx_w)
2537 GEN_VEXT_VX_RM(vsmul_vx_d)
2538 
2539 /* Vector Single-Width Scaling Shift Instructions */
2540 static inline uint8_t
2541 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2542 {
2543     uint8_t round, shift = b & 0x7;
2544     uint8_t res;
2545 
2546     round = get_round(vxrm, a, shift);
2547     res   = (a >> shift)  + round;
2548     return res;
2549 }
2550 static inline uint16_t
2551 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2552 {
2553     uint8_t round, shift = b & 0xf;
2554     uint16_t res;
2555 
2556     round = get_round(vxrm, a, shift);
2557     res   = (a >> shift)  + round;
2558     return res;
2559 }
2560 static inline uint32_t
2561 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2562 {
2563     uint8_t round, shift = b & 0x1f;
2564     uint32_t res;
2565 
2566     round = get_round(vxrm, a, shift);
2567     res   = (a >> shift)  + round;
2568     return res;
2569 }
2570 static inline uint64_t
2571 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2572 {
2573     uint8_t round, shift = b & 0x3f;
2574     uint64_t res;
2575 
2576     round = get_round(vxrm, a, shift);
2577     res   = (a >> shift)  + round;
2578     return res;
2579 }
2580 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2581 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2582 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2583 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2584 GEN_VEXT_VV_RM(vssrl_vv_b)
2585 GEN_VEXT_VV_RM(vssrl_vv_h)
2586 GEN_VEXT_VV_RM(vssrl_vv_w)
2587 GEN_VEXT_VV_RM(vssrl_vv_d)
2588 
2589 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2590 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2591 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2592 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2593 GEN_VEXT_VX_RM(vssrl_vx_b)
2594 GEN_VEXT_VX_RM(vssrl_vx_h)
2595 GEN_VEXT_VX_RM(vssrl_vx_w)
2596 GEN_VEXT_VX_RM(vssrl_vx_d)
2597 
2598 static inline int8_t
2599 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2600 {
2601     uint8_t round, shift = b & 0x7;
2602     int8_t res;
2603 
2604     round = get_round(vxrm, a, shift);
2605     res   = (a >> shift)  + round;
2606     return res;
2607 }
2608 static inline int16_t
2609 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2610 {
2611     uint8_t round, shift = b & 0xf;
2612     int16_t res;
2613 
2614     round = get_round(vxrm, a, shift);
2615     res   = (a >> shift)  + round;
2616     return res;
2617 }
2618 static inline int32_t
2619 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2620 {
2621     uint8_t round, shift = b & 0x1f;
2622     int32_t res;
2623 
2624     round = get_round(vxrm, a, shift);
2625     res   = (a >> shift)  + round;
2626     return res;
2627 }
2628 static inline int64_t
2629 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2630 {
2631     uint8_t round, shift = b & 0x3f;
2632     int64_t res;
2633 
2634     round = get_round(vxrm, a, shift);
2635     res   = (a >> shift)  + round;
2636     return res;
2637 }
2638 
2639 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2640 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2641 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2642 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2643 GEN_VEXT_VV_RM(vssra_vv_b)
2644 GEN_VEXT_VV_RM(vssra_vv_h)
2645 GEN_VEXT_VV_RM(vssra_vv_w)
2646 GEN_VEXT_VV_RM(vssra_vv_d)
2647 
2648 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2649 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2650 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2651 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2652 GEN_VEXT_VX_RM(vssra_vx_b)
2653 GEN_VEXT_VX_RM(vssra_vx_h)
2654 GEN_VEXT_VX_RM(vssra_vx_w)
2655 GEN_VEXT_VX_RM(vssra_vx_d)
2656 
2657 /* Vector Narrowing Fixed-Point Clip Instructions */
2658 static inline int8_t
2659 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2660 {
2661     uint8_t round, shift = b & 0xf;
2662     int16_t res;
2663 
2664     round = get_round(vxrm, a, shift);
2665     res   = (a >> shift)  + round;
2666     if (res > INT8_MAX) {
2667         env->vxsat = 0x1;
2668         return INT8_MAX;
2669     } else if (res < INT8_MIN) {
2670         env->vxsat = 0x1;
2671         return INT8_MIN;
2672     } else {
2673         return res;
2674     }
2675 }
2676 
2677 static inline int16_t
2678 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2679 {
2680     uint8_t round, shift = b & 0x1f;
2681     int32_t res;
2682 
2683     round = get_round(vxrm, a, shift);
2684     res   = (a >> shift)  + round;
2685     if (res > INT16_MAX) {
2686         env->vxsat = 0x1;
2687         return INT16_MAX;
2688     } else if (res < INT16_MIN) {
2689         env->vxsat = 0x1;
2690         return INT16_MIN;
2691     } else {
2692         return res;
2693     }
2694 }
2695 
2696 static inline int32_t
2697 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2698 {
2699     uint8_t round, shift = b & 0x3f;
2700     int64_t res;
2701 
2702     round = get_round(vxrm, a, shift);
2703     res   = (a >> shift)  + round;
2704     if (res > INT32_MAX) {
2705         env->vxsat = 0x1;
2706         return INT32_MAX;
2707     } else if (res < INT32_MIN) {
2708         env->vxsat = 0x1;
2709         return INT32_MIN;
2710     } else {
2711         return res;
2712     }
2713 }
2714 
2715 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2716 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2717 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2718 GEN_VEXT_VV_RM(vnclip_wv_b)
2719 GEN_VEXT_VV_RM(vnclip_wv_h)
2720 GEN_VEXT_VV_RM(vnclip_wv_w)
2721 
2722 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2723 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2724 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2725 GEN_VEXT_VX_RM(vnclip_wx_b)
2726 GEN_VEXT_VX_RM(vnclip_wx_h)
2727 GEN_VEXT_VX_RM(vnclip_wx_w)
2728 
2729 static inline uint8_t
2730 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2731 {
2732     uint8_t round, shift = b & 0xf;
2733     uint16_t res;
2734 
2735     round = get_round(vxrm, a, shift);
2736     res   = (a >> shift)  + round;
2737     if (res > UINT8_MAX) {
2738         env->vxsat = 0x1;
2739         return UINT8_MAX;
2740     } else {
2741         return res;
2742     }
2743 }
2744 
2745 static inline uint16_t
2746 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2747 {
2748     uint8_t round, shift = b & 0x1f;
2749     uint32_t res;
2750 
2751     round = get_round(vxrm, a, shift);
2752     res   = (a >> shift)  + round;
2753     if (res > UINT16_MAX) {
2754         env->vxsat = 0x1;
2755         return UINT16_MAX;
2756     } else {
2757         return res;
2758     }
2759 }
2760 
2761 static inline uint32_t
2762 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2763 {
2764     uint8_t round, shift = b & 0x3f;
2765     uint64_t res;
2766 
2767     round = get_round(vxrm, a, shift);
2768     res   = (a >> shift)  + round;
2769     if (res > UINT32_MAX) {
2770         env->vxsat = 0x1;
2771         return UINT32_MAX;
2772     } else {
2773         return res;
2774     }
2775 }
2776 
2777 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2778 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2779 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2780 GEN_VEXT_VV_RM(vnclipu_wv_b)
2781 GEN_VEXT_VV_RM(vnclipu_wv_h)
2782 GEN_VEXT_VV_RM(vnclipu_wv_w)
2783 
2784 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2785 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2786 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2787 GEN_VEXT_VX_RM(vnclipu_wx_b)
2788 GEN_VEXT_VX_RM(vnclipu_wx_h)
2789 GEN_VEXT_VX_RM(vnclipu_wx_w)
2790 
2791 /*
2792  *** Vector Float Point Arithmetic Instructions
2793  */
2794 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2795 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2796 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2797                       CPURISCVState *env)                      \
2798 {                                                              \
2799     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2800     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2801     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2802 }
2803 
2804 #define GEN_VEXT_VV_ENV(NAME)                             \
2805 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2806                   void *vs2, CPURISCVState *env,          \
2807                   uint32_t desc)                          \
2808 {                                                         \
2809     uint32_t vm = vext_vm(desc);                          \
2810     uint32_t vl = env->vl;                                \
2811     uint32_t i;                                           \
2812                                                           \
2813     for (i = env->vstart; i < vl; i++) {                  \
2814         if (!vm && !vext_elem_mask(v0, i)) {              \
2815             continue;                                     \
2816         }                                                 \
2817         do_##NAME(vd, vs1, vs2, i, env);                  \
2818     }                                                     \
2819     env->vstart = 0;                                      \
2820 }
2821 
2822 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2823 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2824 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2825 GEN_VEXT_VV_ENV(vfadd_vv_h)
2826 GEN_VEXT_VV_ENV(vfadd_vv_w)
2827 GEN_VEXT_VV_ENV(vfadd_vv_d)
2828 
2829 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2830 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2831                       CPURISCVState *env)                      \
2832 {                                                              \
2833     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2834     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2835 }
2836 
2837 #define GEN_VEXT_VF(NAME)                                 \
2838 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2839                   void *vs2, CPURISCVState *env,          \
2840                   uint32_t desc)                          \
2841 {                                                         \
2842     uint32_t vm = vext_vm(desc);                          \
2843     uint32_t vl = env->vl;                                \
2844     uint32_t i;                                           \
2845                                                           \
2846     for (i = env->vstart; i < vl; i++) {                  \
2847         if (!vm && !vext_elem_mask(v0, i)) {              \
2848             continue;                                     \
2849         }                                                 \
2850         do_##NAME(vd, s1, vs2, i, env);                   \
2851     }                                                     \
2852     env->vstart = 0;                                      \
2853 }
2854 
2855 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2856 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2857 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2858 GEN_VEXT_VF(vfadd_vf_h)
2859 GEN_VEXT_VF(vfadd_vf_w)
2860 GEN_VEXT_VF(vfadd_vf_d)
2861 
2862 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2863 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2864 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2865 GEN_VEXT_VV_ENV(vfsub_vv_h)
2866 GEN_VEXT_VV_ENV(vfsub_vv_w)
2867 GEN_VEXT_VV_ENV(vfsub_vv_d)
2868 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2869 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2870 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2871 GEN_VEXT_VF(vfsub_vf_h)
2872 GEN_VEXT_VF(vfsub_vf_w)
2873 GEN_VEXT_VF(vfsub_vf_d)
2874 
2875 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2876 {
2877     return float16_sub(b, a, s);
2878 }
2879 
2880 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2881 {
2882     return float32_sub(b, a, s);
2883 }
2884 
2885 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2886 {
2887     return float64_sub(b, a, s);
2888 }
2889 
2890 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2891 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2892 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2893 GEN_VEXT_VF(vfrsub_vf_h)
2894 GEN_VEXT_VF(vfrsub_vf_w)
2895 GEN_VEXT_VF(vfrsub_vf_d)
2896 
2897 /* Vector Widening Floating-Point Add/Subtract Instructions */
2898 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2899 {
2900     return float32_add(float16_to_float32(a, true, s),
2901             float16_to_float32(b, true, s), s);
2902 }
2903 
2904 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2905 {
2906     return float64_add(float32_to_float64(a, s),
2907             float32_to_float64(b, s), s);
2908 
2909 }
2910 
2911 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2912 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2913 GEN_VEXT_VV_ENV(vfwadd_vv_h)
2914 GEN_VEXT_VV_ENV(vfwadd_vv_w)
2915 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2916 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2917 GEN_VEXT_VF(vfwadd_vf_h)
2918 GEN_VEXT_VF(vfwadd_vf_w)
2919 
2920 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2921 {
2922     return float32_sub(float16_to_float32(a, true, s),
2923             float16_to_float32(b, true, s), s);
2924 }
2925 
2926 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2927 {
2928     return float64_sub(float32_to_float64(a, s),
2929             float32_to_float64(b, s), s);
2930 
2931 }
2932 
2933 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2934 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2935 GEN_VEXT_VV_ENV(vfwsub_vv_h)
2936 GEN_VEXT_VV_ENV(vfwsub_vv_w)
2937 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2938 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2939 GEN_VEXT_VF(vfwsub_vf_h)
2940 GEN_VEXT_VF(vfwsub_vf_w)
2941 
2942 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2943 {
2944     return float32_add(a, float16_to_float32(b, true, s), s);
2945 }
2946 
2947 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2948 {
2949     return float64_add(a, float32_to_float64(b, s), s);
2950 }
2951 
2952 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2953 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2954 GEN_VEXT_VV_ENV(vfwadd_wv_h)
2955 GEN_VEXT_VV_ENV(vfwadd_wv_w)
2956 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2957 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2958 GEN_VEXT_VF(vfwadd_wf_h)
2959 GEN_VEXT_VF(vfwadd_wf_w)
2960 
2961 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2962 {
2963     return float32_sub(a, float16_to_float32(b, true, s), s);
2964 }
2965 
2966 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2967 {
2968     return float64_sub(a, float32_to_float64(b, s), s);
2969 }
2970 
2971 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2972 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2973 GEN_VEXT_VV_ENV(vfwsub_wv_h)
2974 GEN_VEXT_VV_ENV(vfwsub_wv_w)
2975 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2976 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2977 GEN_VEXT_VF(vfwsub_wf_h)
2978 GEN_VEXT_VF(vfwsub_wf_w)
2979 
2980 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2981 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2982 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2983 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2984 GEN_VEXT_VV_ENV(vfmul_vv_h)
2985 GEN_VEXT_VV_ENV(vfmul_vv_w)
2986 GEN_VEXT_VV_ENV(vfmul_vv_d)
2987 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2988 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2989 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2990 GEN_VEXT_VF(vfmul_vf_h)
2991 GEN_VEXT_VF(vfmul_vf_w)
2992 GEN_VEXT_VF(vfmul_vf_d)
2993 
2994 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2995 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2996 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
2997 GEN_VEXT_VV_ENV(vfdiv_vv_h)
2998 GEN_VEXT_VV_ENV(vfdiv_vv_w)
2999 GEN_VEXT_VV_ENV(vfdiv_vv_d)
3000 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3001 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3002 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3003 GEN_VEXT_VF(vfdiv_vf_h)
3004 GEN_VEXT_VF(vfdiv_vf_w)
3005 GEN_VEXT_VF(vfdiv_vf_d)
3006 
3007 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3008 {
3009     return float16_div(b, a, s);
3010 }
3011 
3012 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3013 {
3014     return float32_div(b, a, s);
3015 }
3016 
3017 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3018 {
3019     return float64_div(b, a, s);
3020 }
3021 
3022 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3023 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3024 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3025 GEN_VEXT_VF(vfrdiv_vf_h)
3026 GEN_VEXT_VF(vfrdiv_vf_w)
3027 GEN_VEXT_VF(vfrdiv_vf_d)
3028 
3029 /* Vector Widening Floating-Point Multiply */
3030 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3031 {
3032     return float32_mul(float16_to_float32(a, true, s),
3033             float16_to_float32(b, true, s), s);
3034 }
3035 
3036 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3037 {
3038     return float64_mul(float32_to_float64(a, s),
3039             float32_to_float64(b, s), s);
3040 
3041 }
3042 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3043 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3044 GEN_VEXT_VV_ENV(vfwmul_vv_h)
3045 GEN_VEXT_VV_ENV(vfwmul_vv_w)
3046 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3047 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3048 GEN_VEXT_VF(vfwmul_vf_h)
3049 GEN_VEXT_VF(vfwmul_vf_w)
3050 
3051 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3052 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3053 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3054         CPURISCVState *env)                                        \
3055 {                                                                  \
3056     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3057     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3058     TD d = *((TD *)vd + HD(i));                                    \
3059     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3060 }
3061 
3062 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3063 {
3064     return float16_muladd(a, b, d, 0, s);
3065 }
3066 
3067 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3068 {
3069     return float32_muladd(a, b, d, 0, s);
3070 }
3071 
3072 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3073 {
3074     return float64_muladd(a, b, d, 0, s);
3075 }
3076 
3077 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3078 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3079 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3080 GEN_VEXT_VV_ENV(vfmacc_vv_h)
3081 GEN_VEXT_VV_ENV(vfmacc_vv_w)
3082 GEN_VEXT_VV_ENV(vfmacc_vv_d)
3083 
3084 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3085 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3086         CPURISCVState *env)                                       \
3087 {                                                                 \
3088     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3089     TD d = *((TD *)vd + HD(i));                                   \
3090     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3091 }
3092 
3093 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3094 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3095 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3096 GEN_VEXT_VF(vfmacc_vf_h)
3097 GEN_VEXT_VF(vfmacc_vf_w)
3098 GEN_VEXT_VF(vfmacc_vf_d)
3099 
3100 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3101 {
3102     return float16_muladd(a, b, d,
3103             float_muladd_negate_c | float_muladd_negate_product, s);
3104 }
3105 
3106 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3107 {
3108     return float32_muladd(a, b, d,
3109             float_muladd_negate_c | float_muladd_negate_product, s);
3110 }
3111 
3112 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3113 {
3114     return float64_muladd(a, b, d,
3115             float_muladd_negate_c | float_muladd_negate_product, s);
3116 }
3117 
3118 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3119 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3120 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3121 GEN_VEXT_VV_ENV(vfnmacc_vv_h)
3122 GEN_VEXT_VV_ENV(vfnmacc_vv_w)
3123 GEN_VEXT_VV_ENV(vfnmacc_vv_d)
3124 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3125 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3126 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3127 GEN_VEXT_VF(vfnmacc_vf_h)
3128 GEN_VEXT_VF(vfnmacc_vf_w)
3129 GEN_VEXT_VF(vfnmacc_vf_d)
3130 
3131 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3132 {
3133     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3134 }
3135 
3136 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3137 {
3138     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3139 }
3140 
3141 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3142 {
3143     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3144 }
3145 
3146 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3147 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3148 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3149 GEN_VEXT_VV_ENV(vfmsac_vv_h)
3150 GEN_VEXT_VV_ENV(vfmsac_vv_w)
3151 GEN_VEXT_VV_ENV(vfmsac_vv_d)
3152 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3153 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3154 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3155 GEN_VEXT_VF(vfmsac_vf_h)
3156 GEN_VEXT_VF(vfmsac_vf_w)
3157 GEN_VEXT_VF(vfmsac_vf_d)
3158 
3159 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3160 {
3161     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3162 }
3163 
3164 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3165 {
3166     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3167 }
3168 
3169 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3170 {
3171     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3172 }
3173 
3174 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3175 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3176 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3177 GEN_VEXT_VV_ENV(vfnmsac_vv_h)
3178 GEN_VEXT_VV_ENV(vfnmsac_vv_w)
3179 GEN_VEXT_VV_ENV(vfnmsac_vv_d)
3180 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3181 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3182 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3183 GEN_VEXT_VF(vfnmsac_vf_h)
3184 GEN_VEXT_VF(vfnmsac_vf_w)
3185 GEN_VEXT_VF(vfnmsac_vf_d)
3186 
3187 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3188 {
3189     return float16_muladd(d, b, a, 0, s);
3190 }
3191 
3192 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3193 {
3194     return float32_muladd(d, b, a, 0, s);
3195 }
3196 
3197 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3198 {
3199     return float64_muladd(d, b, a, 0, s);
3200 }
3201 
3202 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3203 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3204 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3205 GEN_VEXT_VV_ENV(vfmadd_vv_h)
3206 GEN_VEXT_VV_ENV(vfmadd_vv_w)
3207 GEN_VEXT_VV_ENV(vfmadd_vv_d)
3208 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3209 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3210 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3211 GEN_VEXT_VF(vfmadd_vf_h)
3212 GEN_VEXT_VF(vfmadd_vf_w)
3213 GEN_VEXT_VF(vfmadd_vf_d)
3214 
3215 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3216 {
3217     return float16_muladd(d, b, a,
3218             float_muladd_negate_c | float_muladd_negate_product, s);
3219 }
3220 
3221 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3222 {
3223     return float32_muladd(d, b, a,
3224             float_muladd_negate_c | float_muladd_negate_product, s);
3225 }
3226 
3227 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3228 {
3229     return float64_muladd(d, b, a,
3230             float_muladd_negate_c | float_muladd_negate_product, s);
3231 }
3232 
3233 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3234 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3235 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3236 GEN_VEXT_VV_ENV(vfnmadd_vv_h)
3237 GEN_VEXT_VV_ENV(vfnmadd_vv_w)
3238 GEN_VEXT_VV_ENV(vfnmadd_vv_d)
3239 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3240 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3241 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3242 GEN_VEXT_VF(vfnmadd_vf_h)
3243 GEN_VEXT_VF(vfnmadd_vf_w)
3244 GEN_VEXT_VF(vfnmadd_vf_d)
3245 
3246 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3247 {
3248     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3249 }
3250 
3251 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3252 {
3253     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3254 }
3255 
3256 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3257 {
3258     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3259 }
3260 
3261 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3262 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3263 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3264 GEN_VEXT_VV_ENV(vfmsub_vv_h)
3265 GEN_VEXT_VV_ENV(vfmsub_vv_w)
3266 GEN_VEXT_VV_ENV(vfmsub_vv_d)
3267 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3268 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3269 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3270 GEN_VEXT_VF(vfmsub_vf_h)
3271 GEN_VEXT_VF(vfmsub_vf_w)
3272 GEN_VEXT_VF(vfmsub_vf_d)
3273 
3274 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3275 {
3276     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3277 }
3278 
3279 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3280 {
3281     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3282 }
3283 
3284 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3285 {
3286     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3287 }
3288 
3289 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3290 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3291 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3292 GEN_VEXT_VV_ENV(vfnmsub_vv_h)
3293 GEN_VEXT_VV_ENV(vfnmsub_vv_w)
3294 GEN_VEXT_VV_ENV(vfnmsub_vv_d)
3295 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3296 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3297 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3298 GEN_VEXT_VF(vfnmsub_vf_h)
3299 GEN_VEXT_VF(vfnmsub_vf_w)
3300 GEN_VEXT_VF(vfnmsub_vf_d)
3301 
3302 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3303 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3304 {
3305     return float32_muladd(float16_to_float32(a, true, s),
3306                         float16_to_float32(b, true, s), d, 0, s);
3307 }
3308 
3309 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3310 {
3311     return float64_muladd(float32_to_float64(a, s),
3312                         float32_to_float64(b, s), d, 0, s);
3313 }
3314 
3315 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3316 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3317 GEN_VEXT_VV_ENV(vfwmacc_vv_h)
3318 GEN_VEXT_VV_ENV(vfwmacc_vv_w)
3319 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3320 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3321 GEN_VEXT_VF(vfwmacc_vf_h)
3322 GEN_VEXT_VF(vfwmacc_vf_w)
3323 
3324 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3325 {
3326     return float32_muladd(float16_to_float32(a, true, s),
3327                         float16_to_float32(b, true, s), d,
3328                         float_muladd_negate_c | float_muladd_negate_product, s);
3329 }
3330 
3331 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3332 {
3333     return float64_muladd(float32_to_float64(a, s),
3334                         float32_to_float64(b, s), d,
3335                         float_muladd_negate_c | float_muladd_negate_product, s);
3336 }
3337 
3338 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3339 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3340 GEN_VEXT_VV_ENV(vfwnmacc_vv_h)
3341 GEN_VEXT_VV_ENV(vfwnmacc_vv_w)
3342 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3343 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3344 GEN_VEXT_VF(vfwnmacc_vf_h)
3345 GEN_VEXT_VF(vfwnmacc_vf_w)
3346 
3347 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3348 {
3349     return float32_muladd(float16_to_float32(a, true, s),
3350                         float16_to_float32(b, true, s), d,
3351                         float_muladd_negate_c, s);
3352 }
3353 
3354 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3355 {
3356     return float64_muladd(float32_to_float64(a, s),
3357                         float32_to_float64(b, s), d,
3358                         float_muladd_negate_c, s);
3359 }
3360 
3361 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3362 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3363 GEN_VEXT_VV_ENV(vfwmsac_vv_h)
3364 GEN_VEXT_VV_ENV(vfwmsac_vv_w)
3365 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3366 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3367 GEN_VEXT_VF(vfwmsac_vf_h)
3368 GEN_VEXT_VF(vfwmsac_vf_w)
3369 
3370 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3371 {
3372     return float32_muladd(float16_to_float32(a, true, s),
3373                         float16_to_float32(b, true, s), d,
3374                         float_muladd_negate_product, s);
3375 }
3376 
3377 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3378 {
3379     return float64_muladd(float32_to_float64(a, s),
3380                         float32_to_float64(b, s), d,
3381                         float_muladd_negate_product, s);
3382 }
3383 
3384 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3385 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3386 GEN_VEXT_VV_ENV(vfwnmsac_vv_h)
3387 GEN_VEXT_VV_ENV(vfwnmsac_vv_w)
3388 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3389 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3390 GEN_VEXT_VF(vfwnmsac_vf_h)
3391 GEN_VEXT_VF(vfwnmsac_vf_w)
3392 
3393 /* Vector Floating-Point Square-Root Instruction */
3394 /* (TD, T2, TX2) */
3395 #define OP_UU_H uint16_t, uint16_t, uint16_t
3396 #define OP_UU_W uint32_t, uint32_t, uint32_t
3397 #define OP_UU_D uint64_t, uint64_t, uint64_t
3398 
3399 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3400 static void do_##NAME(void *vd, void *vs2, int i,      \
3401         CPURISCVState *env)                            \
3402 {                                                      \
3403     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3404     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3405 }
3406 
3407 #define GEN_VEXT_V_ENV(NAME)                           \
3408 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3409         CPURISCVState *env, uint32_t desc)             \
3410 {                                                      \
3411     uint32_t vm = vext_vm(desc);                       \
3412     uint32_t vl = env->vl;                             \
3413     uint32_t i;                                        \
3414                                                        \
3415     if (vl == 0) {                                     \
3416         return;                                        \
3417     }                                                  \
3418     for (i = env->vstart; i < vl; i++) {               \
3419         if (!vm && !vext_elem_mask(v0, i)) {           \
3420             continue;                                  \
3421         }                                              \
3422         do_##NAME(vd, vs2, i, env);                    \
3423     }                                                  \
3424     env->vstart = 0;                                   \
3425 }
3426 
3427 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3428 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3429 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3430 GEN_VEXT_V_ENV(vfsqrt_v_h)
3431 GEN_VEXT_V_ENV(vfsqrt_v_w)
3432 GEN_VEXT_V_ENV(vfsqrt_v_d)
3433 
3434 /*
3435  * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3436  *
3437  * Adapted from riscv-v-spec recip.c:
3438  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3439  */
3440 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3441 {
3442     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3443     uint64_t exp = extract64(f, frac_size, exp_size);
3444     uint64_t frac = extract64(f, 0, frac_size);
3445 
3446     const uint8_t lookup_table[] = {
3447         52, 51, 50, 48, 47, 46, 44, 43,
3448         42, 41, 40, 39, 38, 36, 35, 34,
3449         33, 32, 31, 30, 30, 29, 28, 27,
3450         26, 25, 24, 23, 23, 22, 21, 20,
3451         19, 19, 18, 17, 16, 16, 15, 14,
3452         14, 13, 12, 12, 11, 10, 10, 9,
3453         9, 8, 7, 7, 6, 6, 5, 4,
3454         4, 3, 3, 2, 2, 1, 1, 0,
3455         127, 125, 123, 121, 119, 118, 116, 114,
3456         113, 111, 109, 108, 106, 105, 103, 102,
3457         100, 99, 97, 96, 95, 93, 92, 91,
3458         90, 88, 87, 86, 85, 84, 83, 82,
3459         80, 79, 78, 77, 76, 75, 74, 73,
3460         72, 71, 70, 70, 69, 68, 67, 66,
3461         65, 64, 63, 63, 62, 61, 60, 59,
3462         59, 58, 57, 56, 56, 55, 54, 53
3463     };
3464     const int precision = 7;
3465 
3466     if (exp == 0 && frac != 0) { /* subnormal */
3467         /* Normalize the subnormal. */
3468         while (extract64(frac, frac_size - 1, 1) == 0) {
3469             exp--;
3470             frac <<= 1;
3471         }
3472 
3473         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3474     }
3475 
3476     int idx = ((exp & 1) << (precision - 1)) |
3477                 (frac >> (frac_size - precision + 1));
3478     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3479                             (frac_size - precision);
3480     uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3481 
3482     uint64_t val = 0;
3483     val = deposit64(val, 0, frac_size, out_frac);
3484     val = deposit64(val, frac_size, exp_size, out_exp);
3485     val = deposit64(val, frac_size + exp_size, 1, sign);
3486     return val;
3487 }
3488 
3489 static float16 frsqrt7_h(float16 f, float_status *s)
3490 {
3491     int exp_size = 5, frac_size = 10;
3492     bool sign = float16_is_neg(f);
3493 
3494     /*
3495      * frsqrt7(sNaN) = canonical NaN
3496      * frsqrt7(-inf) = canonical NaN
3497      * frsqrt7(-normal) = canonical NaN
3498      * frsqrt7(-subnormal) = canonical NaN
3499      */
3500     if (float16_is_signaling_nan(f, s) ||
3501             (float16_is_infinity(f) && sign) ||
3502             (float16_is_normal(f) && sign) ||
3503             (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3504         s->float_exception_flags |= float_flag_invalid;
3505         return float16_default_nan(s);
3506     }
3507 
3508     /* frsqrt7(qNaN) = canonical NaN */
3509     if (float16_is_quiet_nan(f, s)) {
3510         return float16_default_nan(s);
3511     }
3512 
3513     /* frsqrt7(+-0) = +-inf */
3514     if (float16_is_zero(f)) {
3515         s->float_exception_flags |= float_flag_divbyzero;
3516         return float16_set_sign(float16_infinity, sign);
3517     }
3518 
3519     /* frsqrt7(+inf) = +0 */
3520     if (float16_is_infinity(f) && !sign) {
3521         return float16_set_sign(float16_zero, sign);
3522     }
3523 
3524     /* +normal, +subnormal */
3525     uint64_t val = frsqrt7(f, exp_size, frac_size);
3526     return make_float16(val);
3527 }
3528 
3529 static float32 frsqrt7_s(float32 f, float_status *s)
3530 {
3531     int exp_size = 8, frac_size = 23;
3532     bool sign = float32_is_neg(f);
3533 
3534     /*
3535      * frsqrt7(sNaN) = canonical NaN
3536      * frsqrt7(-inf) = canonical NaN
3537      * frsqrt7(-normal) = canonical NaN
3538      * frsqrt7(-subnormal) = canonical NaN
3539      */
3540     if (float32_is_signaling_nan(f, s) ||
3541             (float32_is_infinity(f) && sign) ||
3542             (float32_is_normal(f) && sign) ||
3543             (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3544         s->float_exception_flags |= float_flag_invalid;
3545         return float32_default_nan(s);
3546     }
3547 
3548     /* frsqrt7(qNaN) = canonical NaN */
3549     if (float32_is_quiet_nan(f, s)) {
3550         return float32_default_nan(s);
3551     }
3552 
3553     /* frsqrt7(+-0) = +-inf */
3554     if (float32_is_zero(f)) {
3555         s->float_exception_flags |= float_flag_divbyzero;
3556         return float32_set_sign(float32_infinity, sign);
3557     }
3558 
3559     /* frsqrt7(+inf) = +0 */
3560     if (float32_is_infinity(f) && !sign) {
3561         return float32_set_sign(float32_zero, sign);
3562     }
3563 
3564     /* +normal, +subnormal */
3565     uint64_t val = frsqrt7(f, exp_size, frac_size);
3566     return make_float32(val);
3567 }
3568 
3569 static float64 frsqrt7_d(float64 f, float_status *s)
3570 {
3571     int exp_size = 11, frac_size = 52;
3572     bool sign = float64_is_neg(f);
3573 
3574     /*
3575      * frsqrt7(sNaN) = canonical NaN
3576      * frsqrt7(-inf) = canonical NaN
3577      * frsqrt7(-normal) = canonical NaN
3578      * frsqrt7(-subnormal) = canonical NaN
3579      */
3580     if (float64_is_signaling_nan(f, s) ||
3581             (float64_is_infinity(f) && sign) ||
3582             (float64_is_normal(f) && sign) ||
3583             (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3584         s->float_exception_flags |= float_flag_invalid;
3585         return float64_default_nan(s);
3586     }
3587 
3588     /* frsqrt7(qNaN) = canonical NaN */
3589     if (float64_is_quiet_nan(f, s)) {
3590         return float64_default_nan(s);
3591     }
3592 
3593     /* frsqrt7(+-0) = +-inf */
3594     if (float64_is_zero(f)) {
3595         s->float_exception_flags |= float_flag_divbyzero;
3596         return float64_set_sign(float64_infinity, sign);
3597     }
3598 
3599     /* frsqrt7(+inf) = +0 */
3600     if (float64_is_infinity(f) && !sign) {
3601         return float64_set_sign(float64_zero, sign);
3602     }
3603 
3604     /* +normal, +subnormal */
3605     uint64_t val = frsqrt7(f, exp_size, frac_size);
3606     return make_float64(val);
3607 }
3608 
3609 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3610 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3611 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3612 GEN_VEXT_V_ENV(vfrsqrt7_v_h)
3613 GEN_VEXT_V_ENV(vfrsqrt7_v_w)
3614 GEN_VEXT_V_ENV(vfrsqrt7_v_d)
3615 
3616 /*
3617  * Vector Floating-Point Reciprocal Estimate Instruction
3618  *
3619  * Adapted from riscv-v-spec recip.c:
3620  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3621  */
3622 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3623                       float_status *s)
3624 {
3625     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3626     uint64_t exp = extract64(f, frac_size, exp_size);
3627     uint64_t frac = extract64(f, 0, frac_size);
3628 
3629     const uint8_t lookup_table[] = {
3630         127, 125, 123, 121, 119, 117, 116, 114,
3631         112, 110, 109, 107, 105, 104, 102, 100,
3632         99, 97, 96, 94, 93, 91, 90, 88,
3633         87, 85, 84, 83, 81, 80, 79, 77,
3634         76, 75, 74, 72, 71, 70, 69, 68,
3635         66, 65, 64, 63, 62, 61, 60, 59,
3636         58, 57, 56, 55, 54, 53, 52, 51,
3637         50, 49, 48, 47, 46, 45, 44, 43,
3638         42, 41, 40, 40, 39, 38, 37, 36,
3639         35, 35, 34, 33, 32, 31, 31, 30,
3640         29, 28, 28, 27, 26, 25, 25, 24,
3641         23, 23, 22, 21, 21, 20, 19, 19,
3642         18, 17, 17, 16, 15, 15, 14, 14,
3643         13, 12, 12, 11, 11, 10, 9, 9,
3644         8, 8, 7, 7, 6, 5, 5, 4,
3645         4, 3, 3, 2, 2, 1, 1, 0
3646     };
3647     const int precision = 7;
3648 
3649     if (exp == 0 && frac != 0) { /* subnormal */
3650         /* Normalize the subnormal. */
3651         while (extract64(frac, frac_size - 1, 1) == 0) {
3652             exp--;
3653             frac <<= 1;
3654         }
3655 
3656         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3657 
3658         if (exp != 0 && exp != UINT64_MAX) {
3659             /*
3660              * Overflow to inf or max value of same sign,
3661              * depending on sign and rounding mode.
3662              */
3663             s->float_exception_flags |= (float_flag_inexact |
3664                                          float_flag_overflow);
3665 
3666             if ((s->float_rounding_mode == float_round_to_zero) ||
3667                 ((s->float_rounding_mode == float_round_down) && !sign) ||
3668                 ((s->float_rounding_mode == float_round_up) && sign)) {
3669                 /* Return greatest/negative finite value. */
3670                 return (sign << (exp_size + frac_size)) |
3671                     (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3672             } else {
3673                 /* Return +-inf. */
3674                 return (sign << (exp_size + frac_size)) |
3675                     MAKE_64BIT_MASK(frac_size, exp_size);
3676             }
3677         }
3678     }
3679 
3680     int idx = frac >> (frac_size - precision);
3681     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3682                             (frac_size - precision);
3683     uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3684 
3685     if (out_exp == 0 || out_exp == UINT64_MAX) {
3686         /*
3687          * The result is subnormal, but don't raise the underflow exception,
3688          * because there's no additional loss of precision.
3689          */
3690         out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3691         if (out_exp == UINT64_MAX) {
3692             out_frac >>= 1;
3693             out_exp = 0;
3694         }
3695     }
3696 
3697     uint64_t val = 0;
3698     val = deposit64(val, 0, frac_size, out_frac);
3699     val = deposit64(val, frac_size, exp_size, out_exp);
3700     val = deposit64(val, frac_size + exp_size, 1, sign);
3701     return val;
3702 }
3703 
3704 static float16 frec7_h(float16 f, float_status *s)
3705 {
3706     int exp_size = 5, frac_size = 10;
3707     bool sign = float16_is_neg(f);
3708 
3709     /* frec7(+-inf) = +-0 */
3710     if (float16_is_infinity(f)) {
3711         return float16_set_sign(float16_zero, sign);
3712     }
3713 
3714     /* frec7(+-0) = +-inf */
3715     if (float16_is_zero(f)) {
3716         s->float_exception_flags |= float_flag_divbyzero;
3717         return float16_set_sign(float16_infinity, sign);
3718     }
3719 
3720     /* frec7(sNaN) = canonical NaN */
3721     if (float16_is_signaling_nan(f, s)) {
3722         s->float_exception_flags |= float_flag_invalid;
3723         return float16_default_nan(s);
3724     }
3725 
3726     /* frec7(qNaN) = canonical NaN */
3727     if (float16_is_quiet_nan(f, s)) {
3728         return float16_default_nan(s);
3729     }
3730 
3731     /* +-normal, +-subnormal */
3732     uint64_t val = frec7(f, exp_size, frac_size, s);
3733     return make_float16(val);
3734 }
3735 
3736 static float32 frec7_s(float32 f, float_status *s)
3737 {
3738     int exp_size = 8, frac_size = 23;
3739     bool sign = float32_is_neg(f);
3740 
3741     /* frec7(+-inf) = +-0 */
3742     if (float32_is_infinity(f)) {
3743         return float32_set_sign(float32_zero, sign);
3744     }
3745 
3746     /* frec7(+-0) = +-inf */
3747     if (float32_is_zero(f)) {
3748         s->float_exception_flags |= float_flag_divbyzero;
3749         return float32_set_sign(float32_infinity, sign);
3750     }
3751 
3752     /* frec7(sNaN) = canonical NaN */
3753     if (float32_is_signaling_nan(f, s)) {
3754         s->float_exception_flags |= float_flag_invalid;
3755         return float32_default_nan(s);
3756     }
3757 
3758     /* frec7(qNaN) = canonical NaN */
3759     if (float32_is_quiet_nan(f, s)) {
3760         return float32_default_nan(s);
3761     }
3762 
3763     /* +-normal, +-subnormal */
3764     uint64_t val = frec7(f, exp_size, frac_size, s);
3765     return make_float32(val);
3766 }
3767 
3768 static float64 frec7_d(float64 f, float_status *s)
3769 {
3770     int exp_size = 11, frac_size = 52;
3771     bool sign = float64_is_neg(f);
3772 
3773     /* frec7(+-inf) = +-0 */
3774     if (float64_is_infinity(f)) {
3775         return float64_set_sign(float64_zero, sign);
3776     }
3777 
3778     /* frec7(+-0) = +-inf */
3779     if (float64_is_zero(f)) {
3780         s->float_exception_flags |= float_flag_divbyzero;
3781         return float64_set_sign(float64_infinity, sign);
3782     }
3783 
3784     /* frec7(sNaN) = canonical NaN */
3785     if (float64_is_signaling_nan(f, s)) {
3786         s->float_exception_flags |= float_flag_invalid;
3787         return float64_default_nan(s);
3788     }
3789 
3790     /* frec7(qNaN) = canonical NaN */
3791     if (float64_is_quiet_nan(f, s)) {
3792         return float64_default_nan(s);
3793     }
3794 
3795     /* +-normal, +-subnormal */
3796     uint64_t val = frec7(f, exp_size, frac_size, s);
3797     return make_float64(val);
3798 }
3799 
3800 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3801 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3802 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3803 GEN_VEXT_V_ENV(vfrec7_v_h)
3804 GEN_VEXT_V_ENV(vfrec7_v_w)
3805 GEN_VEXT_V_ENV(vfrec7_v_d)
3806 
3807 /* Vector Floating-Point MIN/MAX Instructions */
3808 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3809 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3810 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3811 GEN_VEXT_VV_ENV(vfmin_vv_h)
3812 GEN_VEXT_VV_ENV(vfmin_vv_w)
3813 GEN_VEXT_VV_ENV(vfmin_vv_d)
3814 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3815 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3816 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3817 GEN_VEXT_VF(vfmin_vf_h)
3818 GEN_VEXT_VF(vfmin_vf_w)
3819 GEN_VEXT_VF(vfmin_vf_d)
3820 
3821 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3822 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3823 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3824 GEN_VEXT_VV_ENV(vfmax_vv_h)
3825 GEN_VEXT_VV_ENV(vfmax_vv_w)
3826 GEN_VEXT_VV_ENV(vfmax_vv_d)
3827 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3828 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3829 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3830 GEN_VEXT_VF(vfmax_vf_h)
3831 GEN_VEXT_VF(vfmax_vf_w)
3832 GEN_VEXT_VF(vfmax_vf_d)
3833 
3834 /* Vector Floating-Point Sign-Injection Instructions */
3835 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3836 {
3837     return deposit64(b, 0, 15, a);
3838 }
3839 
3840 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3841 {
3842     return deposit64(b, 0, 31, a);
3843 }
3844 
3845 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3846 {
3847     return deposit64(b, 0, 63, a);
3848 }
3849 
3850 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3851 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3852 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3853 GEN_VEXT_VV_ENV(vfsgnj_vv_h)
3854 GEN_VEXT_VV_ENV(vfsgnj_vv_w)
3855 GEN_VEXT_VV_ENV(vfsgnj_vv_d)
3856 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3857 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3858 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3859 GEN_VEXT_VF(vfsgnj_vf_h)
3860 GEN_VEXT_VF(vfsgnj_vf_w)
3861 GEN_VEXT_VF(vfsgnj_vf_d)
3862 
3863 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3864 {
3865     return deposit64(~b, 0, 15, a);
3866 }
3867 
3868 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3869 {
3870     return deposit64(~b, 0, 31, a);
3871 }
3872 
3873 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3874 {
3875     return deposit64(~b, 0, 63, a);
3876 }
3877 
3878 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3879 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3880 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3881 GEN_VEXT_VV_ENV(vfsgnjn_vv_h)
3882 GEN_VEXT_VV_ENV(vfsgnjn_vv_w)
3883 GEN_VEXT_VV_ENV(vfsgnjn_vv_d)
3884 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3885 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3886 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3887 GEN_VEXT_VF(vfsgnjn_vf_h)
3888 GEN_VEXT_VF(vfsgnjn_vf_w)
3889 GEN_VEXT_VF(vfsgnjn_vf_d)
3890 
3891 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3892 {
3893     return deposit64(b ^ a, 0, 15, a);
3894 }
3895 
3896 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3897 {
3898     return deposit64(b ^ a, 0, 31, a);
3899 }
3900 
3901 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3902 {
3903     return deposit64(b ^ a, 0, 63, a);
3904 }
3905 
3906 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3907 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3908 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3909 GEN_VEXT_VV_ENV(vfsgnjx_vv_h)
3910 GEN_VEXT_VV_ENV(vfsgnjx_vv_w)
3911 GEN_VEXT_VV_ENV(vfsgnjx_vv_d)
3912 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3913 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3914 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3915 GEN_VEXT_VF(vfsgnjx_vf_h)
3916 GEN_VEXT_VF(vfsgnjx_vf_w)
3917 GEN_VEXT_VF(vfsgnjx_vf_d)
3918 
3919 /* Vector Floating-Point Compare Instructions */
3920 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3921 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3922                   CPURISCVState *env, uint32_t desc)          \
3923 {                                                             \
3924     uint32_t vm = vext_vm(desc);                              \
3925     uint32_t vl = env->vl;                                    \
3926     uint32_t i;                                               \
3927                                                               \
3928     for (i = env->vstart; i < vl; i++) {                      \
3929         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3930         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3931         if (!vm && !vext_elem_mask(v0, i)) {                  \
3932             continue;                                         \
3933         }                                                     \
3934         vext_set_elem_mask(vd, i,                             \
3935                            DO_OP(s2, s1, &env->fp_status));   \
3936     }                                                         \
3937     env->vstart = 0;                                          \
3938 }
3939 
3940 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3941 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3942 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3943 
3944 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3945 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3946                   CPURISCVState *env, uint32_t desc)                \
3947 {                                                                   \
3948     uint32_t vm = vext_vm(desc);                                    \
3949     uint32_t vl = env->vl;                                          \
3950     uint32_t i;                                                     \
3951                                                                     \
3952     for (i = env->vstart; i < vl; i++) {                            \
3953         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3954         if (!vm && !vext_elem_mask(v0, i)) {                        \
3955             continue;                                               \
3956         }                                                           \
3957         vext_set_elem_mask(vd, i,                                   \
3958                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3959     }                                                               \
3960     env->vstart = 0;                                                \
3961 }
3962 
3963 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3964 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3965 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3966 
3967 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3968 {
3969     FloatRelation compare = float16_compare_quiet(a, b, s);
3970     return compare != float_relation_equal;
3971 }
3972 
3973 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3974 {
3975     FloatRelation compare = float32_compare_quiet(a, b, s);
3976     return compare != float_relation_equal;
3977 }
3978 
3979 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3980 {
3981     FloatRelation compare = float64_compare_quiet(a, b, s);
3982     return compare != float_relation_equal;
3983 }
3984 
3985 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3986 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3987 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3988 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3989 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3990 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3991 
3992 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3993 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3994 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3995 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3996 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3997 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3998 
3999 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4000 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4001 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4002 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4003 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4004 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4005 
4006 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4007 {
4008     FloatRelation compare = float16_compare(a, b, s);
4009     return compare == float_relation_greater;
4010 }
4011 
4012 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4013 {
4014     FloatRelation compare = float32_compare(a, b, s);
4015     return compare == float_relation_greater;
4016 }
4017 
4018 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4019 {
4020     FloatRelation compare = float64_compare(a, b, s);
4021     return compare == float_relation_greater;
4022 }
4023 
4024 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4025 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4026 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4027 
4028 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4029 {
4030     FloatRelation compare = float16_compare(a, b, s);
4031     return compare == float_relation_greater ||
4032            compare == float_relation_equal;
4033 }
4034 
4035 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4036 {
4037     FloatRelation compare = float32_compare(a, b, s);
4038     return compare == float_relation_greater ||
4039            compare == float_relation_equal;
4040 }
4041 
4042 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4043 {
4044     FloatRelation compare = float64_compare(a, b, s);
4045     return compare == float_relation_greater ||
4046            compare == float_relation_equal;
4047 }
4048 
4049 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4050 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4051 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4052 
4053 /* Vector Floating-Point Classify Instruction */
4054 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4055 static void do_##NAME(void *vd, void *vs2, int i)      \
4056 {                                                      \
4057     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4058     *((TD *)vd + HD(i)) = OP(s2);                      \
4059 }
4060 
4061 #define GEN_VEXT_V(NAME)                               \
4062 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4063                   CPURISCVState *env, uint32_t desc)   \
4064 {                                                      \
4065     uint32_t vm = vext_vm(desc);                       \
4066     uint32_t vl = env->vl;                             \
4067     uint32_t i;                                        \
4068                                                        \
4069     for (i = env->vstart; i < vl; i++) {               \
4070         if (!vm && !vext_elem_mask(v0, i)) {           \
4071             continue;                                  \
4072         }                                              \
4073         do_##NAME(vd, vs2, i);                         \
4074     }                                                  \
4075     env->vstart = 0;                                   \
4076 }
4077 
4078 target_ulong fclass_h(uint64_t frs1)
4079 {
4080     float16 f = frs1;
4081     bool sign = float16_is_neg(f);
4082 
4083     if (float16_is_infinity(f)) {
4084         return sign ? 1 << 0 : 1 << 7;
4085     } else if (float16_is_zero(f)) {
4086         return sign ? 1 << 3 : 1 << 4;
4087     } else if (float16_is_zero_or_denormal(f)) {
4088         return sign ? 1 << 2 : 1 << 5;
4089     } else if (float16_is_any_nan(f)) {
4090         float_status s = { }; /* for snan_bit_is_one */
4091         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4092     } else {
4093         return sign ? 1 << 1 : 1 << 6;
4094     }
4095 }
4096 
4097 target_ulong fclass_s(uint64_t frs1)
4098 {
4099     float32 f = frs1;
4100     bool sign = float32_is_neg(f);
4101 
4102     if (float32_is_infinity(f)) {
4103         return sign ? 1 << 0 : 1 << 7;
4104     } else if (float32_is_zero(f)) {
4105         return sign ? 1 << 3 : 1 << 4;
4106     } else if (float32_is_zero_or_denormal(f)) {
4107         return sign ? 1 << 2 : 1 << 5;
4108     } else if (float32_is_any_nan(f)) {
4109         float_status s = { }; /* for snan_bit_is_one */
4110         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4111     } else {
4112         return sign ? 1 << 1 : 1 << 6;
4113     }
4114 }
4115 
4116 target_ulong fclass_d(uint64_t frs1)
4117 {
4118     float64 f = frs1;
4119     bool sign = float64_is_neg(f);
4120 
4121     if (float64_is_infinity(f)) {
4122         return sign ? 1 << 0 : 1 << 7;
4123     } else if (float64_is_zero(f)) {
4124         return sign ? 1 << 3 : 1 << 4;
4125     } else if (float64_is_zero_or_denormal(f)) {
4126         return sign ? 1 << 2 : 1 << 5;
4127     } else if (float64_is_any_nan(f)) {
4128         float_status s = { }; /* for snan_bit_is_one */
4129         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4130     } else {
4131         return sign ? 1 << 1 : 1 << 6;
4132     }
4133 }
4134 
4135 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4136 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4137 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4138 GEN_VEXT_V(vfclass_v_h)
4139 GEN_VEXT_V(vfclass_v_w)
4140 GEN_VEXT_V(vfclass_v_d)
4141 
4142 /* Vector Floating-Point Merge Instruction */
4143 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
4144 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4145                   CPURISCVState *env, uint32_t desc)          \
4146 {                                                             \
4147     uint32_t vm = vext_vm(desc);                              \
4148     uint32_t vl = env->vl;                                    \
4149     uint32_t i;                                               \
4150                                                               \
4151     for (i = env->vstart; i < vl; i++) {                      \
4152         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4153         *((ETYPE *)vd + H(i))                                 \
4154           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
4155     }                                                         \
4156     env->vstart = 0;                                          \
4157 }
4158 
4159 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4160 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4161 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4162 
4163 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4164 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4165 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4166 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4167 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4168 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h)
4169 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w)
4170 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d)
4171 
4172 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4173 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4174 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4175 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4176 GEN_VEXT_V_ENV(vfcvt_x_f_v_h)
4177 GEN_VEXT_V_ENV(vfcvt_x_f_v_w)
4178 GEN_VEXT_V_ENV(vfcvt_x_f_v_d)
4179 
4180 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4181 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4182 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4183 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4184 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h)
4185 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w)
4186 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d)
4187 
4188 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4189 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4190 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4191 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4192 GEN_VEXT_V_ENV(vfcvt_f_x_v_h)
4193 GEN_VEXT_V_ENV(vfcvt_f_x_v_w)
4194 GEN_VEXT_V_ENV(vfcvt_f_x_v_d)
4195 
4196 /* Widening Floating-Point/Integer Type-Convert Instructions */
4197 /* (TD, T2, TX2) */
4198 #define WOP_UU_B uint16_t, uint8_t,  uint8_t
4199 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4200 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4201 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4202 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4203 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4204 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h)
4205 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w)
4206 
4207 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4208 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4209 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4210 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h)
4211 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w)
4212 
4213 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4214 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4215 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4216 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4217 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b)
4218 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h)
4219 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w)
4220 
4221 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4222 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4223 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4224 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4225 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b)
4226 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h)
4227 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w)
4228 
4229 /*
4230  * vfwcvt.f.f.v vd, vs2, vm
4231  * Convert single-width float to double-width float.
4232  */
4233 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4234 {
4235     return float16_to_float32(a, true, s);
4236 }
4237 
4238 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4239 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4240 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h)
4241 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w)
4242 
4243 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4244 /* (TD, T2, TX2) */
4245 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
4246 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4247 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4248 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4249 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4250 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4251 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4252 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b)
4253 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h)
4254 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w)
4255 
4256 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4257 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4258 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4259 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4260 GEN_VEXT_V_ENV(vfncvt_x_f_w_b)
4261 GEN_VEXT_V_ENV(vfncvt_x_f_w_h)
4262 GEN_VEXT_V_ENV(vfncvt_x_f_w_w)
4263 
4264 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4265 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4266 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4267 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h)
4268 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w)
4269 
4270 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4271 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4272 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4273 GEN_VEXT_V_ENV(vfncvt_f_x_w_h)
4274 GEN_VEXT_V_ENV(vfncvt_f_x_w_w)
4275 
4276 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4277 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4278 {
4279     return float32_to_float16(a, true, s);
4280 }
4281 
4282 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4283 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4284 GEN_VEXT_V_ENV(vfncvt_f_f_w_h)
4285 GEN_VEXT_V_ENV(vfncvt_f_f_w_w)
4286 
4287 /*
4288  *** Vector Reduction Operations
4289  */
4290 /* Vector Single-Width Integer Reduction Instructions */
4291 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4292 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4293         void *vs2, CPURISCVState *env, uint32_t desc)     \
4294 {                                                         \
4295     uint32_t vm = vext_vm(desc);                          \
4296     uint32_t vl = env->vl;                                \
4297     uint32_t i;                                           \
4298     TD s1 =  *((TD *)vs1 + HD(0));                        \
4299                                                           \
4300     for (i = env->vstart; i < vl; i++) {                  \
4301         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4302         if (!vm && !vext_elem_mask(v0, i)) {              \
4303             continue;                                     \
4304         }                                                 \
4305         s1 = OP(s1, (TD)s2);                              \
4306     }                                                     \
4307     *((TD *)vd + HD(0)) = s1;                             \
4308     env->vstart = 0;                                      \
4309 }
4310 
4311 /* vd[0] = sum(vs1[0], vs2[*]) */
4312 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4313 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4314 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4315 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4316 
4317 /* vd[0] = maxu(vs1[0], vs2[*]) */
4318 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4319 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4320 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4321 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4322 
4323 /* vd[0] = max(vs1[0], vs2[*]) */
4324 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4325 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4326 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4327 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4328 
4329 /* vd[0] = minu(vs1[0], vs2[*]) */
4330 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4331 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4332 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4333 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4334 
4335 /* vd[0] = min(vs1[0], vs2[*]) */
4336 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4337 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4338 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4339 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4340 
4341 /* vd[0] = and(vs1[0], vs2[*]) */
4342 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4343 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4344 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4345 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4346 
4347 /* vd[0] = or(vs1[0], vs2[*]) */
4348 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4349 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4350 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4351 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4352 
4353 /* vd[0] = xor(vs1[0], vs2[*]) */
4354 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4355 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4356 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4357 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4358 
4359 /* Vector Widening Integer Reduction Instructions */
4360 /* signed sum reduction into double-width accumulator */
4361 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4362 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4363 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4364 
4365 /* Unsigned sum reduction into double-width accumulator */
4366 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4367 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4368 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4369 
4370 /* Vector Single-Width Floating-Point Reduction Instructions */
4371 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4372 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4373                   void *vs2, CPURISCVState *env,           \
4374                   uint32_t desc)                           \
4375 {                                                          \
4376     uint32_t vm = vext_vm(desc);                           \
4377     uint32_t vl = env->vl;                                 \
4378     uint32_t i;                                            \
4379     TD s1 =  *((TD *)vs1 + HD(0));                         \
4380                                                            \
4381     for (i = env->vstart; i < vl; i++) {                   \
4382         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4383         if (!vm && !vext_elem_mask(v0, i)) {               \
4384             continue;                                      \
4385         }                                                  \
4386         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4387     }                                                      \
4388     *((TD *)vd + HD(0)) = s1;                              \
4389     env->vstart = 0;                                       \
4390 }
4391 
4392 /* Unordered sum */
4393 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4394 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4395 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4396 
4397 /* Maximum value */
4398 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4399 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4400 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4401 
4402 /* Minimum value */
4403 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4404 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4405 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4406 
4407 /* Vector Widening Floating-Point Reduction Instructions */
4408 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4409 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4410                             void *vs2, CPURISCVState *env, uint32_t desc)
4411 {
4412     uint32_t vm = vext_vm(desc);
4413     uint32_t vl = env->vl;
4414     uint32_t i;
4415     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4416 
4417     for (i = env->vstart; i < vl; i++) {
4418         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4419         if (!vm && !vext_elem_mask(v0, i)) {
4420             continue;
4421         }
4422         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4423                          &env->fp_status);
4424     }
4425     *((uint32_t *)vd + H4(0)) = s1;
4426     env->vstart = 0;
4427 }
4428 
4429 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4430                             void *vs2, CPURISCVState *env, uint32_t desc)
4431 {
4432     uint32_t vm = vext_vm(desc);
4433     uint32_t vl = env->vl;
4434     uint32_t i;
4435     uint64_t s1 =  *((uint64_t *)vs1);
4436 
4437     for (i = env->vstart; i < vl; i++) {
4438         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4439         if (!vm && !vext_elem_mask(v0, i)) {
4440             continue;
4441         }
4442         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4443                          &env->fp_status);
4444     }
4445     *((uint64_t *)vd) = s1;
4446     env->vstart = 0;
4447 }
4448 
4449 /*
4450  *** Vector Mask Operations
4451  */
4452 /* Vector Mask-Register Logical Instructions */
4453 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4454 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4455                   void *vs2, CPURISCVState *env,          \
4456                   uint32_t desc)                          \
4457 {                                                         \
4458     uint32_t vl = env->vl;                                \
4459     uint32_t i;                                           \
4460     int a, b;                                             \
4461                                                           \
4462     for (i = env->vstart; i < vl; i++) {                  \
4463         a = vext_elem_mask(vs1, i);                       \
4464         b = vext_elem_mask(vs2, i);                       \
4465         vext_set_elem_mask(vd, i, OP(b, a));              \
4466     }                                                     \
4467     env->vstart = 0;                                      \
4468 }
4469 
4470 #define DO_NAND(N, M)  (!(N & M))
4471 #define DO_ANDNOT(N, M)  (N & !M)
4472 #define DO_NOR(N, M)  (!(N | M))
4473 #define DO_ORNOT(N, M)  (N | !M)
4474 #define DO_XNOR(N, M)  (!(N ^ M))
4475 
4476 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4477 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4478 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4479 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4480 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4481 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4482 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4483 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4484 
4485 /* Vector count population in mask vcpop */
4486 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4487                              uint32_t desc)
4488 {
4489     target_ulong cnt = 0;
4490     uint32_t vm = vext_vm(desc);
4491     uint32_t vl = env->vl;
4492     int i;
4493 
4494     for (i = env->vstart; i < vl; i++) {
4495         if (vm || vext_elem_mask(v0, i)) {
4496             if (vext_elem_mask(vs2, i)) {
4497                 cnt++;
4498             }
4499         }
4500     }
4501     env->vstart = 0;
4502     return cnt;
4503 }
4504 
4505 /* vfirst find-first-set mask bit*/
4506 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4507                               uint32_t desc)
4508 {
4509     uint32_t vm = vext_vm(desc);
4510     uint32_t vl = env->vl;
4511     int i;
4512 
4513     for (i = env->vstart; i < vl; i++) {
4514         if (vm || vext_elem_mask(v0, i)) {
4515             if (vext_elem_mask(vs2, i)) {
4516                 return i;
4517             }
4518         }
4519     }
4520     env->vstart = 0;
4521     return -1LL;
4522 }
4523 
4524 enum set_mask_type {
4525     ONLY_FIRST = 1,
4526     INCLUDE_FIRST,
4527     BEFORE_FIRST,
4528 };
4529 
4530 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4531                    uint32_t desc, enum set_mask_type type)
4532 {
4533     uint32_t vm = vext_vm(desc);
4534     uint32_t vl = env->vl;
4535     int i;
4536     bool first_mask_bit = false;
4537 
4538     for (i = env->vstart; i < vl; i++) {
4539         if (!vm && !vext_elem_mask(v0, i)) {
4540             continue;
4541         }
4542         /* write a zero to all following active elements */
4543         if (first_mask_bit) {
4544             vext_set_elem_mask(vd, i, 0);
4545             continue;
4546         }
4547         if (vext_elem_mask(vs2, i)) {
4548             first_mask_bit = true;
4549             if (type == BEFORE_FIRST) {
4550                 vext_set_elem_mask(vd, i, 0);
4551             } else {
4552                 vext_set_elem_mask(vd, i, 1);
4553             }
4554         } else {
4555             if (type == ONLY_FIRST) {
4556                 vext_set_elem_mask(vd, i, 0);
4557             } else {
4558                 vext_set_elem_mask(vd, i, 1);
4559             }
4560         }
4561     }
4562     env->vstart = 0;
4563 }
4564 
4565 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4566                      uint32_t desc)
4567 {
4568     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4569 }
4570 
4571 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4572                      uint32_t desc)
4573 {
4574     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4575 }
4576 
4577 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4578                      uint32_t desc)
4579 {
4580     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4581 }
4582 
4583 /* Vector Iota Instruction */
4584 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4585 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4586                   uint32_t desc)                                          \
4587 {                                                                         \
4588     uint32_t vm = vext_vm(desc);                                          \
4589     uint32_t vl = env->vl;                                                \
4590     uint32_t sum = 0;                                                     \
4591     int i;                                                                \
4592                                                                           \
4593     for (i = env->vstart; i < vl; i++) {                                  \
4594         if (!vm && !vext_elem_mask(v0, i)) {                              \
4595             continue;                                                     \
4596         }                                                                 \
4597         *((ETYPE *)vd + H(i)) = sum;                                      \
4598         if (vext_elem_mask(vs2, i)) {                                     \
4599             sum++;                                                        \
4600         }                                                                 \
4601     }                                                                     \
4602     env->vstart = 0;                                                      \
4603 }
4604 
4605 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4606 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4607 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4608 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4609 
4610 /* Vector Element Index Instruction */
4611 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4612 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4613 {                                                                         \
4614     uint32_t vm = vext_vm(desc);                                          \
4615     uint32_t vl = env->vl;                                                \
4616     int i;                                                                \
4617                                                                           \
4618     for (i = env->vstart; i < vl; i++) {                                  \
4619         if (!vm && !vext_elem_mask(v0, i)) {                              \
4620             continue;                                                     \
4621         }                                                                 \
4622         *((ETYPE *)vd + H(i)) = i;                                        \
4623     }                                                                     \
4624     env->vstart = 0;                                                      \
4625 }
4626 
4627 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4628 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4629 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4630 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4631 
4632 /*
4633  *** Vector Permutation Instructions
4634  */
4635 
4636 /* Vector Slide Instructions */
4637 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4638 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4639                   CPURISCVState *env, uint32_t desc)                      \
4640 {                                                                         \
4641     uint32_t vm = vext_vm(desc);                                          \
4642     uint32_t vl = env->vl;                                                \
4643     target_ulong offset = s1, i_min, i;                                   \
4644                                                                           \
4645     i_min = MAX(env->vstart, offset);                                     \
4646     for (i = i_min; i < vl; i++) {                                        \
4647         if (!vm && !vext_elem_mask(v0, i)) {                              \
4648             continue;                                                     \
4649         }                                                                 \
4650         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4651     }                                                                     \
4652 }
4653 
4654 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4655 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4656 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4657 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4658 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4659 
4660 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4661 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4662                   CPURISCVState *env, uint32_t desc)                      \
4663 {                                                                         \
4664     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4665     uint32_t vm = vext_vm(desc);                                          \
4666     uint32_t vl = env->vl;                                                \
4667     target_ulong i_max, i;                                                \
4668                                                                           \
4669     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4670     for (i = env->vstart; i < i_max; ++i) {                               \
4671         if (vm || vext_elem_mask(v0, i)) {                                \
4672             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4673         }                                                                 \
4674     }                                                                     \
4675                                                                           \
4676     for (i = i_max; i < vl; ++i) {                                        \
4677         if (vm || vext_elem_mask(v0, i)) {                                \
4678             *((ETYPE *)vd + H(i)) = 0;                                    \
4679         }                                                                 \
4680     }                                                                     \
4681                                                                           \
4682     env->vstart = 0;                                                      \
4683 }
4684 
4685 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4686 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4687 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4688 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4689 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4690 
4691 #define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4692 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4693                      CPURISCVState *env, uint32_t desc)                     \
4694 {                                                                           \
4695     typedef uint##ESZ##_t ETYPE;                                            \
4696     uint32_t vm = vext_vm(desc);                                            \
4697     uint32_t vl = env->vl;                                                  \
4698     uint32_t i;                                                             \
4699                                                                             \
4700     for (i = env->vstart; i < vl; i++) {                                    \
4701         if (!vm && !vext_elem_mask(v0, i)) {                                \
4702             continue;                                                       \
4703         }                                                                   \
4704         if (i == 0) {                                                       \
4705             *((ETYPE *)vd + H(i)) = s1;                                     \
4706         } else {                                                            \
4707             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4708         }                                                                   \
4709     }                                                                       \
4710     env->vstart = 0;                                                        \
4711 }
4712 
4713 GEN_VEXT_VSLIE1UP(8,  H1)
4714 GEN_VEXT_VSLIE1UP(16, H2)
4715 GEN_VEXT_VSLIE1UP(32, H4)
4716 GEN_VEXT_VSLIE1UP(64, H8)
4717 
4718 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4719 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4720                   CPURISCVState *env, uint32_t desc)              \
4721 {                                                                 \
4722     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4723 }
4724 
4725 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4726 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4727 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4728 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4729 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4730 
4731 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4732 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4733                        CPURISCVState *env, uint32_t desc)                     \
4734 {                                                                             \
4735     typedef uint##ESZ##_t ETYPE;                                              \
4736     uint32_t vm = vext_vm(desc);                                              \
4737     uint32_t vl = env->vl;                                                    \
4738     uint32_t i;                                                               \
4739                                                                               \
4740     for (i = env->vstart; i < vl; i++) {                                      \
4741         if (!vm && !vext_elem_mask(v0, i)) {                                  \
4742             continue;                                                         \
4743         }                                                                     \
4744         if (i == vl - 1) {                                                    \
4745             *((ETYPE *)vd + H(i)) = s1;                                       \
4746         } else {                                                              \
4747             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4748         }                                                                     \
4749     }                                                                         \
4750     env->vstart = 0;                                                          \
4751 }
4752 
4753 GEN_VEXT_VSLIDE1DOWN(8,  H1)
4754 GEN_VEXT_VSLIDE1DOWN(16, H2)
4755 GEN_VEXT_VSLIDE1DOWN(32, H4)
4756 GEN_VEXT_VSLIDE1DOWN(64, H8)
4757 
4758 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4759 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4760                   CPURISCVState *env, uint32_t desc)              \
4761 {                                                                 \
4762     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4763 }
4764 
4765 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4766 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4767 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4768 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4769 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4770 
4771 /* Vector Floating-Point Slide Instructions */
4772 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4773 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4774                   CPURISCVState *env, uint32_t desc)          \
4775 {                                                             \
4776     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4777 }
4778 
4779 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4780 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4781 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4782 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4783 
4784 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4785 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4786                   CPURISCVState *env, uint32_t desc)          \
4787 {                                                             \
4788     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4789 }
4790 
4791 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4792 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4793 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4794 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4795 
4796 /* Vector Register Gather Instruction */
4797 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4798 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4799                   CPURISCVState *env, uint32_t desc)                      \
4800 {                                                                         \
4801     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4802     uint32_t vm = vext_vm(desc);                                          \
4803     uint32_t vl = env->vl;                                                \
4804     uint64_t index;                                                       \
4805     uint32_t i;                                                           \
4806                                                                           \
4807     for (i = env->vstart; i < vl; i++) {                                  \
4808         if (!vm && !vext_elem_mask(v0, i)) {                              \
4809             continue;                                                     \
4810         }                                                                 \
4811         index = *((TS1 *)vs1 + HS1(i));                                   \
4812         if (index >= vlmax) {                                             \
4813             *((TS2 *)vd + HS2(i)) = 0;                                    \
4814         } else {                                                          \
4815             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4816         }                                                                 \
4817     }                                                                     \
4818     env->vstart = 0;                                                      \
4819 }
4820 
4821 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4822 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4823 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4824 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4825 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4826 
4827 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4828 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4829 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4830 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4831 
4832 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4833 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4834                   CPURISCVState *env, uint32_t desc)                      \
4835 {                                                                         \
4836     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4837     uint32_t vm = vext_vm(desc);                                          \
4838     uint32_t vl = env->vl;                                                \
4839     uint64_t index = s1;                                                  \
4840     uint32_t i;                                                           \
4841                                                                           \
4842     for (i = env->vstart; i < vl; i++) {                                  \
4843         if (!vm && !vext_elem_mask(v0, i)) {                              \
4844             continue;                                                     \
4845         }                                                                 \
4846         if (index >= vlmax) {                                             \
4847             *((ETYPE *)vd + H(i)) = 0;                                    \
4848         } else {                                                          \
4849             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4850         }                                                                 \
4851     }                                                                     \
4852     env->vstart = 0;                                                      \
4853 }
4854 
4855 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4856 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4857 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4858 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4859 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4860 
4861 /* Vector Compress Instruction */
4862 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4863 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4864                   CPURISCVState *env, uint32_t desc)                      \
4865 {                                                                         \
4866     uint32_t vl = env->vl;                                                \
4867     uint32_t num = 0, i;                                                  \
4868                                                                           \
4869     for (i = env->vstart; i < vl; i++) {                                  \
4870         if (!vext_elem_mask(vs1, i)) {                                    \
4871             continue;                                                     \
4872         }                                                                 \
4873         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4874         num++;                                                            \
4875     }                                                                     \
4876     env->vstart = 0;                                                      \
4877 }
4878 
4879 /* Compress into vd elements of vs2 where vs1 is enabled */
4880 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4881 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4882 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4883 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4884 
4885 /* Vector Whole Register Move */
4886 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
4887 {
4888     /* EEW = SEW */
4889     uint32_t maxsz = simd_maxsz(desc);
4890     uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
4891     uint32_t startb = env->vstart * sewb;
4892     uint32_t i = startb;
4893 
4894     memcpy((uint8_t *)vd + H1(i),
4895            (uint8_t *)vs2 + H1(i),
4896            maxsz - startb);
4897 
4898     env->vstart = 0;
4899 }
4900 
4901 /* Vector Integer Extension */
4902 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4903 void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4904                   CPURISCVState *env, uint32_t desc)             \
4905 {                                                                \
4906     uint32_t vl = env->vl;                                       \
4907     uint32_t vm = vext_vm(desc);                                 \
4908     uint32_t i;                                                  \
4909                                                                  \
4910     for (i = env->vstart; i < vl; i++) {                         \
4911         if (!vm && !vext_elem_mask(v0, i)) {                     \
4912             continue;                                            \
4913         }                                                        \
4914         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4915     }                                                            \
4916     env->vstart = 0;                                             \
4917 }
4918 
4919 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4920 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4921 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4922 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4923 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4924 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4925 
4926 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4927 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4928 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4929 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4930 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4931 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4932