xref: /openbmc/qemu/target/riscv/vector_helper.c (revision e848a1e5)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30 
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32                             target_ulong s2)
33 {
34     int vlmax, vl;
35     RISCVCPU *cpu = env_archcpu(env);
36     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39     bool vill = FIELD_EX64(s2, VTYPE, VILL);
40     target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
41 
42     if (lmul & 4) {
43         /* Fractional LMUL. */
44         if (lmul == 4 ||
45             cpu->cfg.elen >> (8 - lmul) < sew) {
46             vill = true;
47         }
48     }
49 
50     if ((sew > cpu->cfg.elen)
51         || vill
52         || (ediv != 0)
53         || (reserved != 0)) {
54         /* only set vill bit. */
55         env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
56         env->vl = 0;
57         env->vstart = 0;
58         return 0;
59     }
60 
61     vlmax = vext_get_vlmax(cpu, s2);
62     if (s1 <= vlmax) {
63         vl = s1;
64     } else {
65         vl = vlmax;
66     }
67     env->vl = vl;
68     env->vtype = s2;
69     env->vstart = 0;
70     return vl;
71 }
72 
73 /*
74  * Note that vector data is stored in host-endian 64-bit chunks,
75  * so addressing units smaller than that needs a host-endian fixup.
76  */
77 #ifdef HOST_WORDS_BIGENDIAN
78 #define H1(x)   ((x) ^ 7)
79 #define H1_2(x) ((x) ^ 6)
80 #define H1_4(x) ((x) ^ 4)
81 #define H2(x)   ((x) ^ 3)
82 #define H4(x)   ((x) ^ 1)
83 #define H8(x)   ((x))
84 #else
85 #define H1(x)   (x)
86 #define H1_2(x) (x)
87 #define H1_4(x) (x)
88 #define H2(x)   (x)
89 #define H4(x)   (x)
90 #define H8(x)   (x)
91 #endif
92 
93 static inline uint32_t vext_nf(uint32_t desc)
94 {
95     return FIELD_EX32(simd_data(desc), VDATA, NF);
96 }
97 
98 static inline uint32_t vext_vm(uint32_t desc)
99 {
100     return FIELD_EX32(simd_data(desc), VDATA, VM);
101 }
102 
103 /*
104  * Encode LMUL to lmul as following:
105  *     LMUL    vlmul    lmul
106  *      1       000       0
107  *      2       001       1
108  *      4       010       2
109  *      8       011       3
110  *      -       100       -
111  *     1/8      101      -3
112  *     1/4      110      -2
113  *     1/2      111      -1
114  */
115 static inline int32_t vext_lmul(uint32_t desc)
116 {
117     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
118 }
119 
120 /*
121  * Get the maximum number of elements can be operated.
122  *
123  * esz: log2 of element size in bytes.
124  */
125 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
126 {
127     /*
128      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
129      * so vlen in bytes (vlenb) is encoded as maxsz.
130      */
131     uint32_t vlenb = simd_maxsz(desc);
132 
133     /* Return VLMAX */
134     int scale = vext_lmul(desc) - esz;
135     return scale < 0 ? vlenb >> -scale : vlenb << scale;
136 }
137 
138 /*
139  * This function checks watchpoint before real load operation.
140  *
141  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
142  * In user mode, there is no watchpoint support now.
143  *
144  * It will trigger an exception if there is no mapping in TLB
145  * and page table walk can't fill the TLB entry. Then the guest
146  * software can return here after process the exception or never return.
147  */
148 static void probe_pages(CPURISCVState *env, target_ulong addr,
149                         target_ulong len, uintptr_t ra,
150                         MMUAccessType access_type)
151 {
152     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
153     target_ulong curlen = MIN(pagelen, len);
154 
155     probe_access(env, addr, curlen, access_type,
156                  cpu_mmu_index(env, false), ra);
157     if (len > curlen) {
158         addr += curlen;
159         curlen = len - curlen;
160         probe_access(env, addr, curlen, access_type,
161                      cpu_mmu_index(env, false), ra);
162     }
163 }
164 
165 static inline void vext_set_elem_mask(void *v0, int index,
166                                       uint8_t value)
167 {
168     int idx = index / 64;
169     int pos = index % 64;
170     uint64_t old = ((uint64_t *)v0)[idx];
171     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
172 }
173 
174 /*
175  * Earlier designs (pre-0.9) had a varying number of bits
176  * per mask value (MLEN). In the 0.9 design, MLEN=1.
177  * (Section 4.5)
178  */
179 static inline int vext_elem_mask(void *v0, int index)
180 {
181     int idx = index / 64;
182     int pos = index  % 64;
183     return (((uint64_t *)v0)[idx] >> pos) & 1;
184 }
185 
186 /* elements operations for load and store */
187 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
188                                uint32_t idx, void *vd, uintptr_t retaddr);
189 
190 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
191 static void NAME(CPURISCVState *env, abi_ptr addr,         \
192                  uint32_t idx, void *vd, uintptr_t retaddr)\
193 {                                                          \
194     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
195     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
196 }                                                          \
197 
198 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
199 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
200 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
201 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
202 
203 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
204 static void NAME(CPURISCVState *env, abi_ptr addr,         \
205                  uint32_t idx, void *vd, uintptr_t retaddr)\
206 {                                                          \
207     ETYPE data = *((ETYPE *)vd + H(idx));                  \
208     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
209 }
210 
211 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
212 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
213 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
214 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
215 
216 /*
217  *** stride: access vector element from strided memory
218  */
219 static void
220 vext_ldst_stride(void *vd, void *v0, target_ulong base,
221                  target_ulong stride, CPURISCVState *env,
222                  uint32_t desc, uint32_t vm,
223                  vext_ldst_elem_fn *ldst_elem,
224                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
225 {
226     uint32_t i, k;
227     uint32_t nf = vext_nf(desc);
228     uint32_t max_elems = vext_max_elems(desc, esz);
229 
230     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
231         if (!vm && !vext_elem_mask(v0, i)) {
232             continue;
233         }
234 
235         k = 0;
236         while (k < nf) {
237             target_ulong addr = base + stride * i + (k << esz);
238             ldst_elem(env, addr, i + k * max_elems, vd, ra);
239             k++;
240         }
241     }
242     env->vstart = 0;
243 }
244 
245 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
246 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
247                   target_ulong stride, CPURISCVState *env,              \
248                   uint32_t desc)                                        \
249 {                                                                       \
250     uint32_t vm = vext_vm(desc);                                        \
251     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
252                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
253 }
254 
255 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
256 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
257 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
258 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
259 
260 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
261 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
262                   target_ulong stride, CPURISCVState *env,              \
263                   uint32_t desc)                                        \
264 {                                                                       \
265     uint32_t vm = vext_vm(desc);                                        \
266     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
267                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
268 }
269 
270 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
271 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
272 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
273 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
274 
275 /*
276  *** unit-stride: access elements stored contiguously in memory
277  */
278 
279 /* unmasked unit-stride load and store operation*/
280 static void
281 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
282              vext_ldst_elem_fn *ldst_elem,
283              uint32_t esz, uintptr_t ra, MMUAccessType access_type)
284 {
285     uint32_t i, k;
286     uint32_t nf = vext_nf(desc);
287     uint32_t max_elems = vext_max_elems(desc, esz);
288 
289     /* load bytes from guest memory */
290     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
291         k = 0;
292         while (k < nf) {
293             target_ulong addr = base + ((i * nf + k) << esz);
294             ldst_elem(env, addr, i + k * max_elems, vd, ra);
295             k++;
296         }
297     }
298     env->vstart = 0;
299 }
300 
301 /*
302  * masked unit-stride load and store operation will be a special case of stride,
303  * stride = NF * sizeof (MTYPE)
304  */
305 
306 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
307 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
308                          CPURISCVState *env, uint32_t desc)             \
309 {                                                                       \
310     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
311     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
312                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
313 }                                                                       \
314                                                                         \
315 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
316                   CPURISCVState *env, uint32_t desc)                    \
317 {                                                                       \
318     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
319                  ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);          \
320 }
321 
322 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
323 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
324 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
325 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
326 
327 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                           \
328 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
329                          CPURISCVState *env, uint32_t desc)             \
330 {                                                                       \
331     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
332     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,  \
333                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
334 }                                                                       \
335                                                                         \
336 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
337                   CPURISCVState *env, uint32_t desc)                    \
338 {                                                                       \
339     vext_ldst_us(vd, base, env, desc, STORE_FN,                         \
340                  ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);         \
341 }
342 
343 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
344 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
345 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
346 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
347 
348 /*
349  *** index: access vector element from indexed memory
350  */
351 typedef target_ulong vext_get_index_addr(target_ulong base,
352         uint32_t idx, void *vs2);
353 
354 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
355 static target_ulong NAME(target_ulong base,            \
356                          uint32_t idx, void *vs2)      \
357 {                                                      \
358     return (base + *((ETYPE *)vs2 + H(idx)));          \
359 }
360 
361 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
362 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
363 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
364 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
365 
366 static inline void
367 vext_ldst_index(void *vd, void *v0, target_ulong base,
368                 void *vs2, CPURISCVState *env, uint32_t desc,
369                 vext_get_index_addr get_index_addr,
370                 vext_ldst_elem_fn *ldst_elem,
371                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
372 {
373     uint32_t i, k;
374     uint32_t nf = vext_nf(desc);
375     uint32_t vm = vext_vm(desc);
376     uint32_t max_elems = vext_max_elems(desc, esz);
377 
378     /* load bytes from guest memory */
379     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
380         if (!vm && !vext_elem_mask(v0, i)) {
381             continue;
382         }
383 
384         k = 0;
385         while (k < nf) {
386             abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
387             ldst_elem(env, addr, i + k * max_elems, vd, ra);
388             k++;
389         }
390     }
391     env->vstart = 0;
392 }
393 
394 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
395 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
396                   void *vs2, CPURISCVState *env, uint32_t desc)            \
397 {                                                                          \
398     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
399                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
400 }
401 
402 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
403 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
404 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
405 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
406 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
407 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
408 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
409 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
410 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
411 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
412 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
413 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
414 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
415 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
416 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
417 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
418 
419 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
420 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
421                   void *vs2, CPURISCVState *env, uint32_t desc)  \
422 {                                                                \
423     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
424                     STORE_FN, ctzl(sizeof(ETYPE)),               \
425                     GETPC(), MMU_DATA_STORE);                    \
426 }
427 
428 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
429 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
430 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
431 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
432 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
433 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
434 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
435 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
436 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
437 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
438 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
439 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
440 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
441 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
442 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
443 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
444 
445 /*
446  *** unit-stride fault-only-fisrt load instructions
447  */
448 static inline void
449 vext_ldff(void *vd, void *v0, target_ulong base,
450           CPURISCVState *env, uint32_t desc,
451           vext_ldst_elem_fn *ldst_elem,
452           uint32_t esz, uintptr_t ra)
453 {
454     void *host;
455     uint32_t i, k, vl = 0;
456     uint32_t nf = vext_nf(desc);
457     uint32_t vm = vext_vm(desc);
458     uint32_t max_elems = vext_max_elems(desc, esz);
459     target_ulong addr, offset, remain;
460 
461     /* probe every access*/
462     for (i = env->vstart; i < env->vl; i++) {
463         if (!vm && !vext_elem_mask(v0, i)) {
464             continue;
465         }
466         addr = base + i * (nf << esz);
467         if (i == 0) {
468             probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
469         } else {
470             /* if it triggers an exception, no need to check watchpoint */
471             remain = nf << esz;
472             while (remain > 0) {
473                 offset = -(addr | TARGET_PAGE_MASK);
474                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
475                                          cpu_mmu_index(env, false));
476                 if (host) {
477 #ifdef CONFIG_USER_ONLY
478                     if (page_check_range(addr, nf << esz, PAGE_READ) < 0) {
479                         vl = i;
480                         goto ProbeSuccess;
481                     }
482 #else
483                     probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
484 #endif
485                 } else {
486                     vl = i;
487                     goto ProbeSuccess;
488                 }
489                 if (remain <=  offset) {
490                     break;
491                 }
492                 remain -= offset;
493                 addr += offset;
494             }
495         }
496     }
497 ProbeSuccess:
498     /* load bytes from guest memory */
499     if (vl != 0) {
500         env->vl = vl;
501     }
502     for (i = env->vstart; i < env->vl; i++) {
503         k = 0;
504         if (!vm && !vext_elem_mask(v0, i)) {
505             continue;
506         }
507         while (k < nf) {
508             target_ulong addr = base + ((i * nf + k) << esz);
509             ldst_elem(env, addr, i + k * max_elems, vd, ra);
510             k++;
511         }
512     }
513     env->vstart = 0;
514 }
515 
516 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
517 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
518                   CPURISCVState *env, uint32_t desc)      \
519 {                                                         \
520     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
521               ctzl(sizeof(ETYPE)), GETPC());              \
522 }
523 
524 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
525 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
526 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
527 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
528 
529 #define DO_SWAP(N, M) (M)
530 #define DO_AND(N, M)  (N & M)
531 #define DO_XOR(N, M)  (N ^ M)
532 #define DO_OR(N, M)   (N | M)
533 #define DO_ADD(N, M)  (N + M)
534 
535 /* Signed min/max */
536 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
537 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
538 
539 /* Unsigned min/max */
540 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
541 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
542 
543 /*
544  *** load and store whole register instructions
545  */
546 static void
547 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
548                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
549                 MMUAccessType access_type)
550 {
551     uint32_t i, k, off, pos;
552     uint32_t nf = vext_nf(desc);
553     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
554     uint32_t max_elems = vlenb >> esz;
555 
556     k = env->vstart / max_elems;
557     off = env->vstart % max_elems;
558 
559     if (off) {
560         /* load/store rest of elements of current segment pointed by vstart */
561         for (pos = off; pos < max_elems; pos++, env->vstart++) {
562             target_ulong addr = base + ((pos + k * max_elems) << esz);
563             ldst_elem(env, addr, pos + k * max_elems, vd, ra);
564         }
565         k++;
566     }
567 
568     /* load/store elements for rest of segments */
569     for (; k < nf; k++) {
570         for (i = 0; i < max_elems; i++, env->vstart++) {
571             target_ulong addr = base + ((i + k * max_elems) << esz);
572             ldst_elem(env, addr, i + k * max_elems, vd, ra);
573         }
574     }
575 
576     env->vstart = 0;
577 }
578 
579 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
580 void HELPER(NAME)(void *vd, target_ulong base,       \
581                   CPURISCVState *env, uint32_t desc) \
582 {                                                    \
583     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
584                     ctzl(sizeof(ETYPE)), GETPC(),    \
585                     MMU_DATA_LOAD);                  \
586 }
587 
588 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
589 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
590 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
591 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
592 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
593 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
594 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
595 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
596 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
597 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
598 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
599 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
600 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
601 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
602 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
603 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
604 
605 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
606 void HELPER(NAME)(void *vd, target_ulong base,       \
607                   CPURISCVState *env, uint32_t desc) \
608 {                                                    \
609     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
610                     ctzl(sizeof(ETYPE)), GETPC(),    \
611                     MMU_DATA_STORE);                 \
612 }
613 
614 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
615 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
616 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
617 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
618 
619 /*
620  *** Vector Integer Arithmetic Instructions
621  */
622 
623 /* expand macro args before macro */
624 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
625 
626 /* (TD, T1, T2, TX1, TX2) */
627 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
628 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
629 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
630 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
631 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
632 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
633 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
634 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
635 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
636 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
637 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
638 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
639 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
640 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
641 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
642 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
643 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
644 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
645 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
646 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
647 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
648 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
649 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
650 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
651 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
652 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
653 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
654 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
655 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
656 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
657 
658 /* operation of two vector elements */
659 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
660 
661 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
662 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
663 {                                                               \
664     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
665     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
666     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
667 }
668 #define DO_SUB(N, M) (N - M)
669 #define DO_RSUB(N, M) (M - N)
670 
671 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
672 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
673 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
674 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
675 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
676 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
677 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
678 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
679 
680 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
681                        CPURISCVState *env, uint32_t desc,
682                        uint32_t esz, uint32_t dsz,
683                        opivv2_fn *fn)
684 {
685     uint32_t vm = vext_vm(desc);
686     uint32_t vl = env->vl;
687     uint32_t i;
688 
689     for (i = env->vstart; i < vl; i++) {
690         if (!vm && !vext_elem_mask(v0, i)) {
691             continue;
692         }
693         fn(vd, vs1, vs2, i);
694     }
695     env->vstart = 0;
696 }
697 
698 /* generate the helpers for OPIVV */
699 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
700 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
701                   void *vs2, CPURISCVState *env,          \
702                   uint32_t desc)                          \
703 {                                                         \
704     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
705                do_##NAME);                                \
706 }
707 
708 GEN_VEXT_VV(vadd_vv_b, 1, 1)
709 GEN_VEXT_VV(vadd_vv_h, 2, 2)
710 GEN_VEXT_VV(vadd_vv_w, 4, 4)
711 GEN_VEXT_VV(vadd_vv_d, 8, 8)
712 GEN_VEXT_VV(vsub_vv_b, 1, 1)
713 GEN_VEXT_VV(vsub_vv_h, 2, 2)
714 GEN_VEXT_VV(vsub_vv_w, 4, 4)
715 GEN_VEXT_VV(vsub_vv_d, 8, 8)
716 
717 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
718 
719 /*
720  * (T1)s1 gives the real operator type.
721  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
722  */
723 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
724 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
725 {                                                                   \
726     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
727     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
728 }
729 
730 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
731 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
732 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
733 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
734 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
735 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
736 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
737 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
738 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
739 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
740 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
741 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
742 
743 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
744                        CPURISCVState *env, uint32_t desc,
745                        uint32_t esz, uint32_t dsz,
746                        opivx2_fn fn)
747 {
748     uint32_t vm = vext_vm(desc);
749     uint32_t vl = env->vl;
750     uint32_t i;
751 
752     for (i = env->vstart; i < vl; i++) {
753         if (!vm && !vext_elem_mask(v0, i)) {
754             continue;
755         }
756         fn(vd, s1, vs2, i);
757     }
758     env->vstart = 0;
759 }
760 
761 /* generate the helpers for OPIVX */
762 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
763 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
764                   void *vs2, CPURISCVState *env,          \
765                   uint32_t desc)                          \
766 {                                                         \
767     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
768                do_##NAME);                                \
769 }
770 
771 GEN_VEXT_VX(vadd_vx_b, 1, 1)
772 GEN_VEXT_VX(vadd_vx_h, 2, 2)
773 GEN_VEXT_VX(vadd_vx_w, 4, 4)
774 GEN_VEXT_VX(vadd_vx_d, 8, 8)
775 GEN_VEXT_VX(vsub_vx_b, 1, 1)
776 GEN_VEXT_VX(vsub_vx_h, 2, 2)
777 GEN_VEXT_VX(vsub_vx_w, 4, 4)
778 GEN_VEXT_VX(vsub_vx_d, 8, 8)
779 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
780 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
781 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
782 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
783 
784 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
785 {
786     intptr_t oprsz = simd_oprsz(desc);
787     intptr_t i;
788 
789     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
790         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
791     }
792 }
793 
794 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
795 {
796     intptr_t oprsz = simd_oprsz(desc);
797     intptr_t i;
798 
799     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
800         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
801     }
802 }
803 
804 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
805 {
806     intptr_t oprsz = simd_oprsz(desc);
807     intptr_t i;
808 
809     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
810         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
811     }
812 }
813 
814 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
815 {
816     intptr_t oprsz = simd_oprsz(desc);
817     intptr_t i;
818 
819     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
820         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
821     }
822 }
823 
824 /* Vector Widening Integer Add/Subtract */
825 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
826 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
827 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
828 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
829 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
830 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
831 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
832 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
833 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
834 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
835 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
836 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
837 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
838 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
839 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
840 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
841 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
842 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
843 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
844 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
845 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
846 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
847 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
848 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
849 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
850 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
851 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
852 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
853 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
854 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
855 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
856 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
857 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
858 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
859 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
860 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
861 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
862 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
863 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
864 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
865 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
866 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
867 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
868 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
869 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
870 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
871 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
872 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
873 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
874 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
875 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
876 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
877 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
878 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
879 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
880 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
881 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
882 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
883 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
884 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
885 
886 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
887 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
888 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
889 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
890 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
891 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
892 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
893 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
894 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
895 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
896 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
897 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
898 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
899 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
900 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
901 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
902 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
903 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
904 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
905 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
906 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
907 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
908 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
909 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
910 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
911 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
912 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
913 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
914 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
915 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
916 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
917 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
918 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
919 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
920 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
921 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
922 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
923 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
924 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
925 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
926 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
927 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
928 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
929 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
930 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
931 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
932 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
933 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
934 
935 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
936 #define DO_VADC(N, M, C) (N + M + C)
937 #define DO_VSBC(N, M, C) (N - M - C)
938 
939 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
940 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
941                   CPURISCVState *env, uint32_t desc)          \
942 {                                                             \
943     uint32_t vl = env->vl;                                    \
944     uint32_t i;                                               \
945                                                               \
946     for (i = env->vstart; i < vl; i++) {                      \
947         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
948         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
949         ETYPE carry = vext_elem_mask(v0, i);                  \
950                                                               \
951         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
952     }                                                         \
953     env->vstart = 0;                                          \
954 }
955 
956 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
957 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
958 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
959 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
960 
961 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
962 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
963 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
964 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
965 
966 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
967 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
968                   CPURISCVState *env, uint32_t desc)                     \
969 {                                                                        \
970     uint32_t vl = env->vl;                                               \
971     uint32_t i;                                                          \
972                                                                          \
973     for (i = env->vstart; i < vl; i++) {                                 \
974         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
975         ETYPE carry = vext_elem_mask(v0, i);                             \
976                                                                          \
977         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
978     }                                                                    \
979     env->vstart = 0;                                          \
980 }
981 
982 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
983 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
984 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
985 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
986 
987 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
988 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
989 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
990 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
991 
992 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
993                           (__typeof(N))(N + M) < N)
994 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
995 
996 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
997 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
998                   CPURISCVState *env, uint32_t desc)          \
999 {                                                             \
1000     uint32_t vl = env->vl;                                    \
1001     uint32_t vm = vext_vm(desc);                              \
1002     uint32_t i;                                               \
1003                                                               \
1004     for (i = env->vstart; i < vl; i++) {                      \
1005         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1006         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1007         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1008         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1009     }                                                         \
1010     env->vstart = 0;                                          \
1011 }
1012 
1013 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1014 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1015 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1016 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1017 
1018 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1019 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1020 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1021 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1022 
1023 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1024 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1025                   void *vs2, CPURISCVState *env, uint32_t desc) \
1026 {                                                               \
1027     uint32_t vl = env->vl;                                      \
1028     uint32_t vm = vext_vm(desc);                                \
1029     uint32_t i;                                                 \
1030                                                                 \
1031     for (i = env->vstart; i < vl; i++) {                        \
1032         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1033         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1034         vext_set_elem_mask(vd, i,                               \
1035                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1036     }                                                           \
1037     env->vstart = 0;                                            \
1038 }
1039 
1040 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1041 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1042 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1043 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1044 
1045 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1046 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1047 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1048 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1049 
1050 /* Vector Bitwise Logical Instructions */
1051 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1052 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1053 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1054 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1055 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1056 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1057 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1058 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1059 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1060 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1061 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1062 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1063 GEN_VEXT_VV(vand_vv_b, 1, 1)
1064 GEN_VEXT_VV(vand_vv_h, 2, 2)
1065 GEN_VEXT_VV(vand_vv_w, 4, 4)
1066 GEN_VEXT_VV(vand_vv_d, 8, 8)
1067 GEN_VEXT_VV(vor_vv_b, 1, 1)
1068 GEN_VEXT_VV(vor_vv_h, 2, 2)
1069 GEN_VEXT_VV(vor_vv_w, 4, 4)
1070 GEN_VEXT_VV(vor_vv_d, 8, 8)
1071 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1072 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1073 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1074 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1075 
1076 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1077 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1078 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1079 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1080 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1081 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1082 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1083 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1084 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1085 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1086 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1087 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1088 GEN_VEXT_VX(vand_vx_b, 1, 1)
1089 GEN_VEXT_VX(vand_vx_h, 2, 2)
1090 GEN_VEXT_VX(vand_vx_w, 4, 4)
1091 GEN_VEXT_VX(vand_vx_d, 8, 8)
1092 GEN_VEXT_VX(vor_vx_b, 1, 1)
1093 GEN_VEXT_VX(vor_vx_h, 2, 2)
1094 GEN_VEXT_VX(vor_vx_w, 4, 4)
1095 GEN_VEXT_VX(vor_vx_d, 8, 8)
1096 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1097 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1098 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1099 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1100 
1101 /* Vector Single-Width Bit Shift Instructions */
1102 #define DO_SLL(N, M)  (N << (M))
1103 #define DO_SRL(N, M)  (N >> (M))
1104 
1105 /* generate the helpers for shift instructions with two vector operators */
1106 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1107 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1108                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1109 {                                                                         \
1110     uint32_t vm = vext_vm(desc);                                          \
1111     uint32_t vl = env->vl;                                                \
1112     uint32_t i;                                                           \
1113                                                                           \
1114     for (i = env->vstart; i < vl; i++) {                                  \
1115         if (!vm && !vext_elem_mask(v0, i)) {                              \
1116             continue;                                                     \
1117         }                                                                 \
1118         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1119         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1120         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1121     }                                                                     \
1122     env->vstart = 0;                                                      \
1123 }
1124 
1125 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1126 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1127 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1128 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1129 
1130 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1131 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1132 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1133 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1134 
1135 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1136 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1137 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1138 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1139 
1140 /* generate the helpers for shift instructions with one vector and one scalar */
1141 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1142 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1143         void *vs2, CPURISCVState *env, uint32_t desc)       \
1144 {                                                           \
1145     uint32_t vm = vext_vm(desc);                            \
1146     uint32_t vl = env->vl;                                  \
1147     uint32_t i;                                             \
1148                                                             \
1149     for (i = env->vstart; i < vl; i++) {                    \
1150         if (!vm && !vext_elem_mask(v0, i)) {                \
1151             continue;                                       \
1152         }                                                   \
1153         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1154         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1155     }                                                       \
1156     env->vstart = 0;                                        \
1157 }
1158 
1159 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1160 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1161 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1162 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1163 
1164 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1165 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1166 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1167 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1168 
1169 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1170 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1171 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1172 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1173 
1174 /* Vector Narrowing Integer Right Shift Instructions */
1175 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1176 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1177 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1178 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1179 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1180 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1181 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1182 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1183 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1184 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1185 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1186 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1187 
1188 /* Vector Integer Comparison Instructions */
1189 #define DO_MSEQ(N, M) (N == M)
1190 #define DO_MSNE(N, M) (N != M)
1191 #define DO_MSLT(N, M) (N < M)
1192 #define DO_MSLE(N, M) (N <= M)
1193 #define DO_MSGT(N, M) (N > M)
1194 
1195 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1196 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1197                   CPURISCVState *env, uint32_t desc)          \
1198 {                                                             \
1199     uint32_t vm = vext_vm(desc);                              \
1200     uint32_t vl = env->vl;                                    \
1201     uint32_t i;                                               \
1202                                                               \
1203     for (i = env->vstart; i < vl; i++) {                      \
1204         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1205         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1206         if (!vm && !vext_elem_mask(v0, i)) {                  \
1207             continue;                                         \
1208         }                                                     \
1209         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1210     }                                                         \
1211     env->vstart = 0;                                          \
1212 }
1213 
1214 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1215 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1216 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1217 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1218 
1219 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1220 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1221 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1222 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1223 
1224 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1225 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1226 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1227 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1228 
1229 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1230 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1231 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1232 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1233 
1234 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1235 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1236 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1237 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1238 
1239 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1240 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1241 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1242 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1243 
1244 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1245 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1246                   CPURISCVState *env, uint32_t desc)                \
1247 {                                                                   \
1248     uint32_t vm = vext_vm(desc);                                    \
1249     uint32_t vl = env->vl;                                          \
1250     uint32_t i;                                                     \
1251                                                                     \
1252     for (i = env->vstart; i < vl; i++) {                            \
1253         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1254         if (!vm && !vext_elem_mask(v0, i)) {                        \
1255             continue;                                               \
1256         }                                                           \
1257         vext_set_elem_mask(vd, i,                                   \
1258                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1259     }                                                               \
1260     env->vstart = 0;                                                \
1261 }
1262 
1263 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1264 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1265 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1266 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1267 
1268 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1269 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1270 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1271 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1272 
1273 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1274 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1275 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1276 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1277 
1278 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1279 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1280 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1281 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1282 
1283 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1284 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1285 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1286 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1287 
1288 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1289 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1290 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1291 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1292 
1293 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1294 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1295 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1296 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1297 
1298 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1299 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1300 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1301 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1302 
1303 /* Vector Integer Min/Max Instructions */
1304 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1305 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1306 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1307 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1308 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1309 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1310 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1311 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1312 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1313 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1314 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1315 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1316 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1317 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1318 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1319 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1320 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1321 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1322 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1323 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1324 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1325 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1326 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1327 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1328 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1329 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1330 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1331 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1332 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1333 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1334 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1335 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1336 
1337 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1338 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1339 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1340 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1341 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1342 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1343 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1344 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1345 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1346 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1347 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1348 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1349 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1350 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1351 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1352 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1353 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1354 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1355 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1356 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1357 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1358 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1359 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1360 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1361 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1362 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1363 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1364 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1365 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1366 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1367 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1368 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1369 
1370 /* Vector Single-Width Integer Multiply Instructions */
1371 #define DO_MUL(N, M) (N * M)
1372 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1373 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1374 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1375 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1376 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1377 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1378 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1379 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1380 
1381 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1382 {
1383     return (int16_t)s2 * (int16_t)s1 >> 8;
1384 }
1385 
1386 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1387 {
1388     return (int32_t)s2 * (int32_t)s1 >> 16;
1389 }
1390 
1391 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1392 {
1393     return (int64_t)s2 * (int64_t)s1 >> 32;
1394 }
1395 
1396 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1397 {
1398     uint64_t hi_64, lo_64;
1399 
1400     muls64(&lo_64, &hi_64, s1, s2);
1401     return hi_64;
1402 }
1403 
1404 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1405 {
1406     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1407 }
1408 
1409 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1410 {
1411     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1412 }
1413 
1414 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1415 {
1416     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1417 }
1418 
1419 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1420 {
1421     uint64_t hi_64, lo_64;
1422 
1423     mulu64(&lo_64, &hi_64, s2, s1);
1424     return hi_64;
1425 }
1426 
1427 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1428 {
1429     return (int16_t)s2 * (uint16_t)s1 >> 8;
1430 }
1431 
1432 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1433 {
1434     return (int32_t)s2 * (uint32_t)s1 >> 16;
1435 }
1436 
1437 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1438 {
1439     return (int64_t)s2 * (uint64_t)s1 >> 32;
1440 }
1441 
1442 /*
1443  * Let  A = signed operand,
1444  *      B = unsigned operand
1445  *      P = mulu64(A, B), unsigned product
1446  *
1447  * LET  X = 2 ** 64  - A, 2's complement of A
1448  *      SP = signed product
1449  * THEN
1450  *      IF A < 0
1451  *          SP = -X * B
1452  *             = -(2 ** 64 - A) * B
1453  *             = A * B - 2 ** 64 * B
1454  *             = P - 2 ** 64 * B
1455  *      ELSE
1456  *          SP = P
1457  * THEN
1458  *      HI_P -= (A < 0 ? B : 0)
1459  */
1460 
1461 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1462 {
1463     uint64_t hi_64, lo_64;
1464 
1465     mulu64(&lo_64, &hi_64, s2, s1);
1466 
1467     hi_64 -= s2 < 0 ? s1 : 0;
1468     return hi_64;
1469 }
1470 
1471 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1472 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1473 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1474 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1475 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1476 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1477 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1478 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1479 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1480 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1481 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1482 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1483 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1484 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1485 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1486 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1487 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1488 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1489 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1490 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1491 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1492 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1493 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1494 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1495 
1496 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1497 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1498 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1499 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1500 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1501 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1502 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1503 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1504 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1505 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1506 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1507 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1508 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1509 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1510 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1511 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1512 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1513 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1514 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1515 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1516 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1517 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1518 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1519 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1520 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1521 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1522 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1523 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1524 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1525 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1526 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1527 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1528 
1529 /* Vector Integer Divide Instructions */
1530 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1531 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1532 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1533         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1534 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1535         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1536 
1537 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1538 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1539 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1540 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1541 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1542 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1543 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1544 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1545 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1546 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1547 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1548 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1549 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1550 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1551 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1552 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1553 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1554 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1555 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1556 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1557 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1558 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1559 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1560 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1561 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1562 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1563 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1564 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1565 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1566 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1567 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1568 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1569 
1570 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1571 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1572 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1573 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1574 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1575 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1576 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1577 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1578 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1579 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1580 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1581 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1582 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1583 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1584 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1585 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1586 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1587 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1588 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1589 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1590 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1591 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1592 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1593 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1594 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1595 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1596 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1597 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1598 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1599 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1600 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1601 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1602 
1603 /* Vector Widening Integer Multiply Instructions */
1604 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1605 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1606 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1607 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1608 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1609 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1610 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1611 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1612 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1613 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1614 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1615 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1616 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1617 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1618 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1619 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1620 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1621 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1622 
1623 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1624 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1625 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1626 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1627 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1628 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1629 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1630 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1631 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1632 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1633 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1634 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1635 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1636 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1637 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1638 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1639 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1640 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1641 
1642 /* Vector Single-Width Integer Multiply-Add Instructions */
1643 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1644 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1645 {                                                                  \
1646     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1647     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1648     TD d = *((TD *)vd + HD(i));                                    \
1649     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1650 }
1651 
1652 #define DO_MACC(N, M, D) (M * N + D)
1653 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1654 #define DO_MADD(N, M, D) (M * D + N)
1655 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1656 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1657 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1658 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1659 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1660 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1661 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1662 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1663 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1664 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1665 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1666 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1667 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1668 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1669 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1670 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1671 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1672 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1673 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1674 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1675 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1676 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1677 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1678 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1679 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1680 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1681 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1682 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1683 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1684 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1685 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1686 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1687 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1688 
1689 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1690 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1691 {                                                                   \
1692     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1693     TD d = *((TD *)vd + HD(i));                                     \
1694     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1695 }
1696 
1697 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1698 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1699 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1700 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1701 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1702 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1703 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1704 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1705 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1706 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1707 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1708 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1709 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1710 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1711 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1712 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1713 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1714 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1715 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1716 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1717 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1718 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1719 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1720 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1721 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1722 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1723 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1724 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1725 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1726 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1727 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1728 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1729 
1730 /* Vector Widening Integer Multiply-Add Instructions */
1731 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1732 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1733 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1734 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1735 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1736 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1737 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1738 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1739 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1740 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1741 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1742 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1743 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1744 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1745 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1746 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1747 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1748 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1749 
1750 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1751 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1752 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1753 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1754 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1755 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1756 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1757 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1758 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1759 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1760 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1761 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1762 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1763 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1764 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1765 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1766 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1767 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1768 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1769 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1770 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1771 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1772 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1773 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1774 
1775 /* Vector Integer Merge and Move Instructions */
1776 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1777 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1778                   uint32_t desc)                                     \
1779 {                                                                    \
1780     uint32_t vl = env->vl;                                           \
1781     uint32_t i;                                                      \
1782                                                                      \
1783     for (i = env->vstart; i < vl; i++) {                             \
1784         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1785         *((ETYPE *)vd + H(i)) = s1;                                  \
1786     }                                                                \
1787     env->vstart = 0;                                                 \
1788 }
1789 
1790 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1791 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1792 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1793 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1794 
1795 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1796 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1797                   uint32_t desc)                                     \
1798 {                                                                    \
1799     uint32_t vl = env->vl;                                           \
1800     uint32_t i;                                                      \
1801                                                                      \
1802     for (i = env->vstart; i < vl; i++) {                             \
1803         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1804     }                                                                \
1805     env->vstart = 0;                                                 \
1806 }
1807 
1808 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1809 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1810 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1811 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1812 
1813 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1814 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1815                   CPURISCVState *env, uint32_t desc)                 \
1816 {                                                                    \
1817     uint32_t vl = env->vl;                                           \
1818     uint32_t i;                                                      \
1819                                                                      \
1820     for (i = env->vstart; i < vl; i++) {                             \
1821         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1822         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1823     }                                                                \
1824     env->vstart = 0;                                                 \
1825 }
1826 
1827 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1828 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1829 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1830 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1831 
1832 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1833 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1834                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1835 {                                                                    \
1836     uint32_t vl = env->vl;                                           \
1837     uint32_t i;                                                      \
1838                                                                      \
1839     for (i = env->vstart; i < vl; i++) {                             \
1840         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1841         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1842                    (ETYPE)(target_long)s1);                          \
1843         *((ETYPE *)vd + H(i)) = d;                                   \
1844     }                                                                \
1845     env->vstart = 0;                                                 \
1846 }
1847 
1848 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1849 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1850 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1851 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1852 
1853 /*
1854  *** Vector Fixed-Point Arithmetic Instructions
1855  */
1856 
1857 /* Vector Single-Width Saturating Add and Subtract */
1858 
1859 /*
1860  * As fixed point instructions probably have round mode and saturation,
1861  * define common macros for fixed point here.
1862  */
1863 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1864                           CPURISCVState *env, int vxrm);
1865 
1866 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1867 static inline void                                                  \
1868 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1869           CPURISCVState *env, int vxrm)                             \
1870 {                                                                   \
1871     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1872     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1873     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1874 }
1875 
1876 static inline void
1877 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1878              CPURISCVState *env,
1879              uint32_t vl, uint32_t vm, int vxrm,
1880              opivv2_rm_fn *fn)
1881 {
1882     for (uint32_t i = env->vstart; i < vl; i++) {
1883         if (!vm && !vext_elem_mask(v0, i)) {
1884             continue;
1885         }
1886         fn(vd, vs1, vs2, i, env, vxrm);
1887     }
1888     env->vstart = 0;
1889 }
1890 
1891 static inline void
1892 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1893              CPURISCVState *env,
1894              uint32_t desc, uint32_t esz, uint32_t dsz,
1895              opivv2_rm_fn *fn)
1896 {
1897     uint32_t vm = vext_vm(desc);
1898     uint32_t vl = env->vl;
1899 
1900     switch (env->vxrm) {
1901     case 0: /* rnu */
1902         vext_vv_rm_1(vd, v0, vs1, vs2,
1903                      env, vl, vm, 0, fn);
1904         break;
1905     case 1: /* rne */
1906         vext_vv_rm_1(vd, v0, vs1, vs2,
1907                      env, vl, vm, 1, fn);
1908         break;
1909     case 2: /* rdn */
1910         vext_vv_rm_1(vd, v0, vs1, vs2,
1911                      env, vl, vm, 2, fn);
1912         break;
1913     default: /* rod */
1914         vext_vv_rm_1(vd, v0, vs1, vs2,
1915                      env, vl, vm, 3, fn);
1916         break;
1917     }
1918 }
1919 
1920 /* generate helpers for fixed point instructions with OPIVV format */
1921 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1922 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1923                   CPURISCVState *env, uint32_t desc)            \
1924 {                                                               \
1925     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1926                  do_##NAME);                                    \
1927 }
1928 
1929 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1930 {
1931     uint8_t res = a + b;
1932     if (res < a) {
1933         res = UINT8_MAX;
1934         env->vxsat = 0x1;
1935     }
1936     return res;
1937 }
1938 
1939 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1940                                uint16_t b)
1941 {
1942     uint16_t res = a + b;
1943     if (res < a) {
1944         res = UINT16_MAX;
1945         env->vxsat = 0x1;
1946     }
1947     return res;
1948 }
1949 
1950 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1951                                uint32_t b)
1952 {
1953     uint32_t res = a + b;
1954     if (res < a) {
1955         res = UINT32_MAX;
1956         env->vxsat = 0x1;
1957     }
1958     return res;
1959 }
1960 
1961 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1962                                uint64_t b)
1963 {
1964     uint64_t res = a + b;
1965     if (res < a) {
1966         res = UINT64_MAX;
1967         env->vxsat = 0x1;
1968     }
1969     return res;
1970 }
1971 
1972 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1973 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1974 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
1975 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
1976 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
1977 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
1978 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
1979 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
1980 
1981 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
1982                           CPURISCVState *env, int vxrm);
1983 
1984 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
1985 static inline void                                                  \
1986 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
1987           CPURISCVState *env, int vxrm)                             \
1988 {                                                                   \
1989     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1990     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
1991 }
1992 
1993 static inline void
1994 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
1995              CPURISCVState *env,
1996              uint32_t vl, uint32_t vm, int vxrm,
1997              opivx2_rm_fn *fn)
1998 {
1999     for (uint32_t i = env->vstart; i < vl; i++) {
2000         if (!vm && !vext_elem_mask(v0, i)) {
2001             continue;
2002         }
2003         fn(vd, s1, vs2, i, env, vxrm);
2004     }
2005     env->vstart = 0;
2006 }
2007 
2008 static inline void
2009 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2010              CPURISCVState *env,
2011              uint32_t desc, uint32_t esz, uint32_t dsz,
2012              opivx2_rm_fn *fn)
2013 {
2014     uint32_t vm = vext_vm(desc);
2015     uint32_t vl = env->vl;
2016 
2017     switch (env->vxrm) {
2018     case 0: /* rnu */
2019         vext_vx_rm_1(vd, v0, s1, vs2,
2020                      env, vl, vm, 0, fn);
2021         break;
2022     case 1: /* rne */
2023         vext_vx_rm_1(vd, v0, s1, vs2,
2024                      env, vl, vm, 1, fn);
2025         break;
2026     case 2: /* rdn */
2027         vext_vx_rm_1(vd, v0, s1, vs2,
2028                      env, vl, vm, 2, fn);
2029         break;
2030     default: /* rod */
2031         vext_vx_rm_1(vd, v0, s1, vs2,
2032                      env, vl, vm, 3, fn);
2033         break;
2034     }
2035 }
2036 
2037 /* generate helpers for fixed point instructions with OPIVX format */
2038 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2039 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2040         void *vs2, CPURISCVState *env, uint32_t desc)     \
2041 {                                                         \
2042     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2043                  do_##NAME);                              \
2044 }
2045 
2046 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2047 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2048 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2049 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2050 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2051 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2052 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2053 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2054 
2055 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2056 {
2057     int8_t res = a + b;
2058     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2059         res = a > 0 ? INT8_MAX : INT8_MIN;
2060         env->vxsat = 0x1;
2061     }
2062     return res;
2063 }
2064 
2065 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2066 {
2067     int16_t res = a + b;
2068     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2069         res = a > 0 ? INT16_MAX : INT16_MIN;
2070         env->vxsat = 0x1;
2071     }
2072     return res;
2073 }
2074 
2075 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2076 {
2077     int32_t res = a + b;
2078     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2079         res = a > 0 ? INT32_MAX : INT32_MIN;
2080         env->vxsat = 0x1;
2081     }
2082     return res;
2083 }
2084 
2085 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2086 {
2087     int64_t res = a + b;
2088     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2089         res = a > 0 ? INT64_MAX : INT64_MIN;
2090         env->vxsat = 0x1;
2091     }
2092     return res;
2093 }
2094 
2095 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2096 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2097 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2098 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2099 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2100 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2101 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2102 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2103 
2104 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2105 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2106 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2107 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2108 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2109 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2110 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2111 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2112 
2113 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2114 {
2115     uint8_t res = a - b;
2116     if (res > a) {
2117         res = 0;
2118         env->vxsat = 0x1;
2119     }
2120     return res;
2121 }
2122 
2123 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2124                                uint16_t b)
2125 {
2126     uint16_t res = a - b;
2127     if (res > a) {
2128         res = 0;
2129         env->vxsat = 0x1;
2130     }
2131     return res;
2132 }
2133 
2134 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2135                                uint32_t b)
2136 {
2137     uint32_t res = a - b;
2138     if (res > a) {
2139         res = 0;
2140         env->vxsat = 0x1;
2141     }
2142     return res;
2143 }
2144 
2145 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2146                                uint64_t b)
2147 {
2148     uint64_t res = a - b;
2149     if (res > a) {
2150         res = 0;
2151         env->vxsat = 0x1;
2152     }
2153     return res;
2154 }
2155 
2156 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2157 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2158 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2159 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2160 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2161 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2162 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2163 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2164 
2165 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2166 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2167 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2168 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2169 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2170 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2171 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2172 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2173 
2174 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2175 {
2176     int8_t res = a - b;
2177     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2178         res = a >= 0 ? INT8_MAX : INT8_MIN;
2179         env->vxsat = 0x1;
2180     }
2181     return res;
2182 }
2183 
2184 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2185 {
2186     int16_t res = a - b;
2187     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2188         res = a >= 0 ? INT16_MAX : INT16_MIN;
2189         env->vxsat = 0x1;
2190     }
2191     return res;
2192 }
2193 
2194 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2195 {
2196     int32_t res = a - b;
2197     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2198         res = a >= 0 ? INT32_MAX : INT32_MIN;
2199         env->vxsat = 0x1;
2200     }
2201     return res;
2202 }
2203 
2204 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2205 {
2206     int64_t res = a - b;
2207     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2208         res = a >= 0 ? INT64_MAX : INT64_MIN;
2209         env->vxsat = 0x1;
2210     }
2211     return res;
2212 }
2213 
2214 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2215 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2216 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2217 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2218 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2219 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2220 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2221 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2222 
2223 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2224 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2225 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2226 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2227 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2228 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2229 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2230 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2231 
2232 /* Vector Single-Width Averaging Add and Subtract */
2233 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2234 {
2235     uint8_t d = extract64(v, shift, 1);
2236     uint8_t d1;
2237     uint64_t D1, D2;
2238 
2239     if (shift == 0 || shift > 64) {
2240         return 0;
2241     }
2242 
2243     d1 = extract64(v, shift - 1, 1);
2244     D1 = extract64(v, 0, shift);
2245     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2246         return d1;
2247     } else if (vxrm == 1) { /* round-to-nearest-even */
2248         if (shift > 1) {
2249             D2 = extract64(v, 0, shift - 1);
2250             return d1 & ((D2 != 0) | d);
2251         } else {
2252             return d1 & d;
2253         }
2254     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2255         return !d & (D1 != 0);
2256     }
2257     return 0; /* round-down (truncate) */
2258 }
2259 
2260 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2261 {
2262     int64_t res = (int64_t)a + b;
2263     uint8_t round = get_round(vxrm, res, 1);
2264 
2265     return (res >> 1) + round;
2266 }
2267 
2268 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2269 {
2270     int64_t res = a + b;
2271     uint8_t round = get_round(vxrm, res, 1);
2272     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2273 
2274     /* With signed overflow, bit 64 is inverse of bit 63. */
2275     return ((res >> 1) ^ over) + round;
2276 }
2277 
2278 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2279 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2280 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2281 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2282 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2283 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2284 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2285 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2286 
2287 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2288 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2289 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2290 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2291 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2292 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2293 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2294 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2295 
2296 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2297                                uint32_t a, uint32_t b)
2298 {
2299     uint64_t res = (uint64_t)a + b;
2300     uint8_t round = get_round(vxrm, res, 1);
2301 
2302     return (res >> 1) + round;
2303 }
2304 
2305 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2306                                uint64_t a, uint64_t b)
2307 {
2308     uint64_t res = a + b;
2309     uint8_t round = get_round(vxrm, res, 1);
2310     uint64_t over = (uint64_t)(res < a) << 63;
2311 
2312     return ((res >> 1) | over) + round;
2313 }
2314 
2315 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2316 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2317 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2318 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2319 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2320 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2321 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2322 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2323 
2324 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2325 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2326 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2327 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2328 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2329 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2330 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2331 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2332 
2333 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2334 {
2335     int64_t res = (int64_t)a - b;
2336     uint8_t round = get_round(vxrm, res, 1);
2337 
2338     return (res >> 1) + round;
2339 }
2340 
2341 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2342 {
2343     int64_t res = (int64_t)a - b;
2344     uint8_t round = get_round(vxrm, res, 1);
2345     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2346 
2347     /* With signed overflow, bit 64 is inverse of bit 63. */
2348     return ((res >> 1) ^ over) + round;
2349 }
2350 
2351 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2352 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2353 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2354 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2355 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2356 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2357 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2358 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2359 
2360 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2361 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2362 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2363 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2364 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2365 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2366 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2367 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2368 
2369 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2370                                uint32_t a, uint32_t b)
2371 {
2372     int64_t res = (int64_t)a - b;
2373     uint8_t round = get_round(vxrm, res, 1);
2374 
2375     return (res >> 1) + round;
2376 }
2377 
2378 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2379                                uint64_t a, uint64_t b)
2380 {
2381     uint64_t res = (uint64_t)a - b;
2382     uint8_t round = get_round(vxrm, res, 1);
2383     uint64_t over = (uint64_t)(res > a) << 63;
2384 
2385     return ((res >> 1) | over) + round;
2386 }
2387 
2388 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2389 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2390 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2391 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2392 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2393 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2394 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2395 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2396 
2397 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2398 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2399 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2400 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2401 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2402 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2403 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2404 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2405 
2406 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2407 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2408 {
2409     uint8_t round;
2410     int16_t res;
2411 
2412     res = (int16_t)a * (int16_t)b;
2413     round = get_round(vxrm, res, 7);
2414     res   = (res >> 7) + round;
2415 
2416     if (res > INT8_MAX) {
2417         env->vxsat = 0x1;
2418         return INT8_MAX;
2419     } else if (res < INT8_MIN) {
2420         env->vxsat = 0x1;
2421         return INT8_MIN;
2422     } else {
2423         return res;
2424     }
2425 }
2426 
2427 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2428 {
2429     uint8_t round;
2430     int32_t res;
2431 
2432     res = (int32_t)a * (int32_t)b;
2433     round = get_round(vxrm, res, 15);
2434     res   = (res >> 15) + round;
2435 
2436     if (res > INT16_MAX) {
2437         env->vxsat = 0x1;
2438         return INT16_MAX;
2439     } else if (res < INT16_MIN) {
2440         env->vxsat = 0x1;
2441         return INT16_MIN;
2442     } else {
2443         return res;
2444     }
2445 }
2446 
2447 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2448 {
2449     uint8_t round;
2450     int64_t res;
2451 
2452     res = (int64_t)a * (int64_t)b;
2453     round = get_round(vxrm, res, 31);
2454     res   = (res >> 31) + round;
2455 
2456     if (res > INT32_MAX) {
2457         env->vxsat = 0x1;
2458         return INT32_MAX;
2459     } else if (res < INT32_MIN) {
2460         env->vxsat = 0x1;
2461         return INT32_MIN;
2462     } else {
2463         return res;
2464     }
2465 }
2466 
2467 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2468 {
2469     uint8_t round;
2470     uint64_t hi_64, lo_64;
2471     int64_t res;
2472 
2473     if (a == INT64_MIN && b == INT64_MIN) {
2474         env->vxsat = 1;
2475         return INT64_MAX;
2476     }
2477 
2478     muls64(&lo_64, &hi_64, a, b);
2479     round = get_round(vxrm, lo_64, 63);
2480     /*
2481      * Cannot overflow, as there are always
2482      * 2 sign bits after multiply.
2483      */
2484     res = (hi_64 << 1) | (lo_64 >> 63);
2485     if (round) {
2486         if (res == INT64_MAX) {
2487             env->vxsat = 1;
2488         } else {
2489             res += 1;
2490         }
2491     }
2492     return res;
2493 }
2494 
2495 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2496 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2497 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2498 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2499 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2500 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2501 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2502 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2503 
2504 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2505 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2506 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2507 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2508 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2509 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2510 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2511 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2512 
2513 /* Vector Single-Width Scaling Shift Instructions */
2514 static inline uint8_t
2515 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2516 {
2517     uint8_t round, shift = b & 0x7;
2518     uint8_t res;
2519 
2520     round = get_round(vxrm, a, shift);
2521     res   = (a >> shift)  + round;
2522     return res;
2523 }
2524 static inline uint16_t
2525 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2526 {
2527     uint8_t round, shift = b & 0xf;
2528     uint16_t res;
2529 
2530     round = get_round(vxrm, a, shift);
2531     res   = (a >> shift)  + round;
2532     return res;
2533 }
2534 static inline uint32_t
2535 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2536 {
2537     uint8_t round, shift = b & 0x1f;
2538     uint32_t res;
2539 
2540     round = get_round(vxrm, a, shift);
2541     res   = (a >> shift)  + round;
2542     return res;
2543 }
2544 static inline uint64_t
2545 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2546 {
2547     uint8_t round, shift = b & 0x3f;
2548     uint64_t res;
2549 
2550     round = get_round(vxrm, a, shift);
2551     res   = (a >> shift)  + round;
2552     return res;
2553 }
2554 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2555 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2556 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2557 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2558 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2559 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2560 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2561 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2562 
2563 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2564 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2565 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2566 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2567 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2568 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2569 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2570 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2571 
2572 static inline int8_t
2573 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2574 {
2575     uint8_t round, shift = b & 0x7;
2576     int8_t res;
2577 
2578     round = get_round(vxrm, a, shift);
2579     res   = (a >> shift)  + round;
2580     return res;
2581 }
2582 static inline int16_t
2583 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2584 {
2585     uint8_t round, shift = b & 0xf;
2586     int16_t res;
2587 
2588     round = get_round(vxrm, a, shift);
2589     res   = (a >> shift)  + round;
2590     return res;
2591 }
2592 static inline int32_t
2593 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2594 {
2595     uint8_t round, shift = b & 0x1f;
2596     int32_t res;
2597 
2598     round = get_round(vxrm, a, shift);
2599     res   = (a >> shift)  + round;
2600     return res;
2601 }
2602 static inline int64_t
2603 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2604 {
2605     uint8_t round, shift = b & 0x3f;
2606     int64_t res;
2607 
2608     round = get_round(vxrm, a, shift);
2609     res   = (a >> shift)  + round;
2610     return res;
2611 }
2612 
2613 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2614 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2615 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2616 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2617 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2618 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2619 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2620 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2621 
2622 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2623 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2624 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2625 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2626 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2627 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2628 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2629 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2630 
2631 /* Vector Narrowing Fixed-Point Clip Instructions */
2632 static inline int8_t
2633 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2634 {
2635     uint8_t round, shift = b & 0xf;
2636     int16_t res;
2637 
2638     round = get_round(vxrm, a, shift);
2639     res   = (a >> shift)  + round;
2640     if (res > INT8_MAX) {
2641         env->vxsat = 0x1;
2642         return INT8_MAX;
2643     } else if (res < INT8_MIN) {
2644         env->vxsat = 0x1;
2645         return INT8_MIN;
2646     } else {
2647         return res;
2648     }
2649 }
2650 
2651 static inline int16_t
2652 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2653 {
2654     uint8_t round, shift = b & 0x1f;
2655     int32_t res;
2656 
2657     round = get_round(vxrm, a, shift);
2658     res   = (a >> shift)  + round;
2659     if (res > INT16_MAX) {
2660         env->vxsat = 0x1;
2661         return INT16_MAX;
2662     } else if (res < INT16_MIN) {
2663         env->vxsat = 0x1;
2664         return INT16_MIN;
2665     } else {
2666         return res;
2667     }
2668 }
2669 
2670 static inline int32_t
2671 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2672 {
2673     uint8_t round, shift = b & 0x3f;
2674     int64_t res;
2675 
2676     round = get_round(vxrm, a, shift);
2677     res   = (a >> shift)  + round;
2678     if (res > INT32_MAX) {
2679         env->vxsat = 0x1;
2680         return INT32_MAX;
2681     } else if (res < INT32_MIN) {
2682         env->vxsat = 0x1;
2683         return INT32_MIN;
2684     } else {
2685         return res;
2686     }
2687 }
2688 
2689 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2690 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2691 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2692 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2693 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2694 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2695 
2696 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2697 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2698 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2699 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2700 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2701 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2702 
2703 static inline uint8_t
2704 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2705 {
2706     uint8_t round, shift = b & 0xf;
2707     uint16_t res;
2708 
2709     round = get_round(vxrm, a, shift);
2710     res   = (a >> shift)  + round;
2711     if (res > UINT8_MAX) {
2712         env->vxsat = 0x1;
2713         return UINT8_MAX;
2714     } else {
2715         return res;
2716     }
2717 }
2718 
2719 static inline uint16_t
2720 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2721 {
2722     uint8_t round, shift = b & 0x1f;
2723     uint32_t res;
2724 
2725     round = get_round(vxrm, a, shift);
2726     res   = (a >> shift)  + round;
2727     if (res > UINT16_MAX) {
2728         env->vxsat = 0x1;
2729         return UINT16_MAX;
2730     } else {
2731         return res;
2732     }
2733 }
2734 
2735 static inline uint32_t
2736 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2737 {
2738     uint8_t round, shift = b & 0x3f;
2739     uint64_t res;
2740 
2741     round = get_round(vxrm, a, shift);
2742     res   = (a >> shift)  + round;
2743     if (res > UINT32_MAX) {
2744         env->vxsat = 0x1;
2745         return UINT32_MAX;
2746     } else {
2747         return res;
2748     }
2749 }
2750 
2751 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2752 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2753 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2754 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2755 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2756 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2757 
2758 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2759 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2760 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2761 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2762 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2763 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2764 
2765 /*
2766  *** Vector Float Point Arithmetic Instructions
2767  */
2768 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2769 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2770 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2771                       CPURISCVState *env)                      \
2772 {                                                              \
2773     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2774     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2775     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2776 }
2777 
2778 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2779 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2780                   void *vs2, CPURISCVState *env,          \
2781                   uint32_t desc)                          \
2782 {                                                         \
2783     uint32_t vm = vext_vm(desc);                          \
2784     uint32_t vl = env->vl;                                \
2785     uint32_t i;                                           \
2786                                                           \
2787     for (i = env->vstart; i < vl; i++) {                  \
2788         if (!vm && !vext_elem_mask(v0, i)) {              \
2789             continue;                                     \
2790         }                                                 \
2791         do_##NAME(vd, vs1, vs2, i, env);                  \
2792     }                                                     \
2793     env->vstart = 0;                                      \
2794 }
2795 
2796 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2797 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2798 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2799 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2800 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2801 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2802 
2803 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2804 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2805                       CPURISCVState *env)                      \
2806 {                                                              \
2807     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2808     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2809 }
2810 
2811 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2812 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2813                   void *vs2, CPURISCVState *env,          \
2814                   uint32_t desc)                          \
2815 {                                                         \
2816     uint32_t vm = vext_vm(desc);                          \
2817     uint32_t vl = env->vl;                                \
2818     uint32_t i;                                           \
2819                                                           \
2820     for (i = env->vstart; i < vl; i++) {                  \
2821         if (!vm && !vext_elem_mask(v0, i)) {              \
2822             continue;                                     \
2823         }                                                 \
2824         do_##NAME(vd, s1, vs2, i, env);                   \
2825     }                                                     \
2826     env->vstart = 0;                                      \
2827 }
2828 
2829 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2830 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2831 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2832 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2833 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2834 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2835 
2836 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2837 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2838 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2839 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2840 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2841 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2842 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2843 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2844 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2845 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2846 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2847 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2848 
2849 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2850 {
2851     return float16_sub(b, a, s);
2852 }
2853 
2854 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2855 {
2856     return float32_sub(b, a, s);
2857 }
2858 
2859 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2860 {
2861     return float64_sub(b, a, s);
2862 }
2863 
2864 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2865 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2866 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2867 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2868 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2869 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2870 
2871 /* Vector Widening Floating-Point Add/Subtract Instructions */
2872 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2873 {
2874     return float32_add(float16_to_float32(a, true, s),
2875             float16_to_float32(b, true, s), s);
2876 }
2877 
2878 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2879 {
2880     return float64_add(float32_to_float64(a, s),
2881             float32_to_float64(b, s), s);
2882 
2883 }
2884 
2885 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2886 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2887 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2888 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2889 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2890 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2891 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2892 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2893 
2894 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2895 {
2896     return float32_sub(float16_to_float32(a, true, s),
2897             float16_to_float32(b, true, s), s);
2898 }
2899 
2900 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2901 {
2902     return float64_sub(float32_to_float64(a, s),
2903             float32_to_float64(b, s), s);
2904 
2905 }
2906 
2907 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2908 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2909 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2910 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2911 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2912 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2913 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2914 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2915 
2916 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2917 {
2918     return float32_add(a, float16_to_float32(b, true, s), s);
2919 }
2920 
2921 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2922 {
2923     return float64_add(a, float32_to_float64(b, s), s);
2924 }
2925 
2926 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2927 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2928 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2929 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2930 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2931 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2932 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2933 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2934 
2935 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2936 {
2937     return float32_sub(a, float16_to_float32(b, true, s), s);
2938 }
2939 
2940 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2941 {
2942     return float64_sub(a, float32_to_float64(b, s), s);
2943 }
2944 
2945 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2946 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2947 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2948 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2949 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2950 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2951 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2952 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2953 
2954 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2955 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2956 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2957 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2958 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2959 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2960 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2961 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2962 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2963 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2964 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2965 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2966 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2967 
2968 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2969 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2970 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
2971 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
2972 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
2973 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
2974 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
2975 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
2976 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
2977 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
2978 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
2979 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
2980 
2981 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
2982 {
2983     return float16_div(b, a, s);
2984 }
2985 
2986 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
2987 {
2988     return float32_div(b, a, s);
2989 }
2990 
2991 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
2992 {
2993     return float64_div(b, a, s);
2994 }
2995 
2996 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
2997 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
2998 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
2999 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3000 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3001 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3002 
3003 /* Vector Widening Floating-Point Multiply */
3004 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3005 {
3006     return float32_mul(float16_to_float32(a, true, s),
3007             float16_to_float32(b, true, s), s);
3008 }
3009 
3010 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3011 {
3012     return float64_mul(float32_to_float64(a, s),
3013             float32_to_float64(b, s), s);
3014 
3015 }
3016 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3017 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3018 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3019 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3020 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3021 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3022 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3023 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3024 
3025 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3026 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3027 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3028         CPURISCVState *env)                                        \
3029 {                                                                  \
3030     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3031     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3032     TD d = *((TD *)vd + HD(i));                                    \
3033     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3034 }
3035 
3036 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3037 {
3038     return float16_muladd(a, b, d, 0, s);
3039 }
3040 
3041 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3042 {
3043     return float32_muladd(a, b, d, 0, s);
3044 }
3045 
3046 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3047 {
3048     return float64_muladd(a, b, d, 0, s);
3049 }
3050 
3051 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3052 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3053 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3054 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3055 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3056 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3057 
3058 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3059 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3060         CPURISCVState *env)                                       \
3061 {                                                                 \
3062     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3063     TD d = *((TD *)vd + HD(i));                                   \
3064     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3065 }
3066 
3067 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3068 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3069 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3070 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3071 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3072 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3073 
3074 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3075 {
3076     return float16_muladd(a, b, d,
3077             float_muladd_negate_c | float_muladd_negate_product, s);
3078 }
3079 
3080 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3081 {
3082     return float32_muladd(a, b, d,
3083             float_muladd_negate_c | float_muladd_negate_product, s);
3084 }
3085 
3086 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3087 {
3088     return float64_muladd(a, b, d,
3089             float_muladd_negate_c | float_muladd_negate_product, s);
3090 }
3091 
3092 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3093 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3094 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3095 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3096 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3097 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3098 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3099 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3100 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3101 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3102 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3103 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3104 
3105 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3106 {
3107     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3108 }
3109 
3110 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3111 {
3112     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3113 }
3114 
3115 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3116 {
3117     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3118 }
3119 
3120 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3121 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3122 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3123 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3124 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3125 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3126 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3127 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3128 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3129 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3130 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3131 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3132 
3133 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3134 {
3135     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3136 }
3137 
3138 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3139 {
3140     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3141 }
3142 
3143 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3144 {
3145     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3146 }
3147 
3148 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3149 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3150 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3151 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3152 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3153 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3154 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3155 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3156 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3157 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3158 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3159 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3160 
3161 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3162 {
3163     return float16_muladd(d, b, a, 0, s);
3164 }
3165 
3166 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3167 {
3168     return float32_muladd(d, b, a, 0, s);
3169 }
3170 
3171 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3172 {
3173     return float64_muladd(d, b, a, 0, s);
3174 }
3175 
3176 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3177 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3178 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3179 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3180 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3181 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3182 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3183 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3184 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3185 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3186 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3187 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3188 
3189 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3190 {
3191     return float16_muladd(d, b, a,
3192             float_muladd_negate_c | float_muladd_negate_product, s);
3193 }
3194 
3195 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3196 {
3197     return float32_muladd(d, b, a,
3198             float_muladd_negate_c | float_muladd_negate_product, s);
3199 }
3200 
3201 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3202 {
3203     return float64_muladd(d, b, a,
3204             float_muladd_negate_c | float_muladd_negate_product, s);
3205 }
3206 
3207 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3208 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3209 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3210 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3211 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3212 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3213 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3214 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3215 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3216 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3217 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3218 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3219 
3220 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3221 {
3222     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3223 }
3224 
3225 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3226 {
3227     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3228 }
3229 
3230 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3231 {
3232     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3233 }
3234 
3235 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3236 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3237 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3238 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3239 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3240 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3241 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3242 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3243 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3244 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3245 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3246 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3247 
3248 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3249 {
3250     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3251 }
3252 
3253 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3254 {
3255     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3256 }
3257 
3258 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3259 {
3260     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3261 }
3262 
3263 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3264 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3265 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3266 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3267 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3268 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3269 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3270 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3271 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3272 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3273 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3274 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3275 
3276 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3277 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3278 {
3279     return float32_muladd(float16_to_float32(a, true, s),
3280                         float16_to_float32(b, true, s), d, 0, s);
3281 }
3282 
3283 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3284 {
3285     return float64_muladd(float32_to_float64(a, s),
3286                         float32_to_float64(b, s), d, 0, s);
3287 }
3288 
3289 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3290 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3291 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3292 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3293 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3294 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3295 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3296 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3297 
3298 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3299 {
3300     return float32_muladd(float16_to_float32(a, true, s),
3301                         float16_to_float32(b, true, s), d,
3302                         float_muladd_negate_c | float_muladd_negate_product, s);
3303 }
3304 
3305 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3306 {
3307     return float64_muladd(float32_to_float64(a, s),
3308                         float32_to_float64(b, s), d,
3309                         float_muladd_negate_c | float_muladd_negate_product, s);
3310 }
3311 
3312 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3313 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3314 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3315 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3316 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3317 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3318 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3319 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3320 
3321 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3322 {
3323     return float32_muladd(float16_to_float32(a, true, s),
3324                         float16_to_float32(b, true, s), d,
3325                         float_muladd_negate_c, s);
3326 }
3327 
3328 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3329 {
3330     return float64_muladd(float32_to_float64(a, s),
3331                         float32_to_float64(b, s), d,
3332                         float_muladd_negate_c, s);
3333 }
3334 
3335 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3336 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3337 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3338 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3339 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3340 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3341 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3342 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3343 
3344 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3345 {
3346     return float32_muladd(float16_to_float32(a, true, s),
3347                         float16_to_float32(b, true, s), d,
3348                         float_muladd_negate_product, s);
3349 }
3350 
3351 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3352 {
3353     return float64_muladd(float32_to_float64(a, s),
3354                         float32_to_float64(b, s), d,
3355                         float_muladd_negate_product, s);
3356 }
3357 
3358 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3359 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3360 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3361 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3362 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3363 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3364 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3365 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3366 
3367 /* Vector Floating-Point Square-Root Instruction */
3368 /* (TD, T2, TX2) */
3369 #define OP_UU_H uint16_t, uint16_t, uint16_t
3370 #define OP_UU_W uint32_t, uint32_t, uint32_t
3371 #define OP_UU_D uint64_t, uint64_t, uint64_t
3372 
3373 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3374 static void do_##NAME(void *vd, void *vs2, int i,      \
3375         CPURISCVState *env)                            \
3376 {                                                      \
3377     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3378     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3379 }
3380 
3381 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3382 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3383         CPURISCVState *env, uint32_t desc)             \
3384 {                                                      \
3385     uint32_t vm = vext_vm(desc);                       \
3386     uint32_t vl = env->vl;                             \
3387     uint32_t i;                                        \
3388                                                        \
3389     if (vl == 0) {                                     \
3390         return;                                        \
3391     }                                                  \
3392     for (i = env->vstart; i < vl; i++) {               \
3393         if (!vm && !vext_elem_mask(v0, i)) {           \
3394             continue;                                  \
3395         }                                              \
3396         do_##NAME(vd, vs2, i, env);                    \
3397     }                                                  \
3398     env->vstart = 0;                                   \
3399 }
3400 
3401 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3402 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3403 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3404 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3405 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3406 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3407 
3408 /*
3409  * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3410  *
3411  * Adapted from riscv-v-spec recip.c:
3412  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3413  */
3414 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3415 {
3416     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3417     uint64_t exp = extract64(f, frac_size, exp_size);
3418     uint64_t frac = extract64(f, 0, frac_size);
3419 
3420     const uint8_t lookup_table[] = {
3421         52, 51, 50, 48, 47, 46, 44, 43,
3422         42, 41, 40, 39, 38, 36, 35, 34,
3423         33, 32, 31, 30, 30, 29, 28, 27,
3424         26, 25, 24, 23, 23, 22, 21, 20,
3425         19, 19, 18, 17, 16, 16, 15, 14,
3426         14, 13, 12, 12, 11, 10, 10, 9,
3427         9, 8, 7, 7, 6, 6, 5, 4,
3428         4, 3, 3, 2, 2, 1, 1, 0,
3429         127, 125, 123, 121, 119, 118, 116, 114,
3430         113, 111, 109, 108, 106, 105, 103, 102,
3431         100, 99, 97, 96, 95, 93, 92, 91,
3432         90, 88, 87, 86, 85, 84, 83, 82,
3433         80, 79, 78, 77, 76, 75, 74, 73,
3434         72, 71, 70, 70, 69, 68, 67, 66,
3435         65, 64, 63, 63, 62, 61, 60, 59,
3436         59, 58, 57, 56, 56, 55, 54, 53
3437     };
3438     const int precision = 7;
3439 
3440     if (exp == 0 && frac != 0) { /* subnormal */
3441         /* Normalize the subnormal. */
3442         while (extract64(frac, frac_size - 1, 1) == 0) {
3443             exp--;
3444             frac <<= 1;
3445         }
3446 
3447         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3448     }
3449 
3450     int idx = ((exp & 1) << (precision - 1)) |
3451                 (frac >> (frac_size - precision + 1));
3452     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3453                             (frac_size - precision);
3454     uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3455 
3456     uint64_t val = 0;
3457     val = deposit64(val, 0, frac_size, out_frac);
3458     val = deposit64(val, frac_size, exp_size, out_exp);
3459     val = deposit64(val, frac_size + exp_size, 1, sign);
3460     return val;
3461 }
3462 
3463 static float16 frsqrt7_h(float16 f, float_status *s)
3464 {
3465     int exp_size = 5, frac_size = 10;
3466     bool sign = float16_is_neg(f);
3467 
3468     /*
3469      * frsqrt7(sNaN) = canonical NaN
3470      * frsqrt7(-inf) = canonical NaN
3471      * frsqrt7(-normal) = canonical NaN
3472      * frsqrt7(-subnormal) = canonical NaN
3473      */
3474     if (float16_is_signaling_nan(f, s) ||
3475             (float16_is_infinity(f) && sign) ||
3476             (float16_is_normal(f) && sign) ||
3477             (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3478         s->float_exception_flags |= float_flag_invalid;
3479         return float16_default_nan(s);
3480     }
3481 
3482     /* frsqrt7(qNaN) = canonical NaN */
3483     if (float16_is_quiet_nan(f, s)) {
3484         return float16_default_nan(s);
3485     }
3486 
3487     /* frsqrt7(+-0) = +-inf */
3488     if (float16_is_zero(f)) {
3489         s->float_exception_flags |= float_flag_divbyzero;
3490         return float16_set_sign(float16_infinity, sign);
3491     }
3492 
3493     /* frsqrt7(+inf) = +0 */
3494     if (float16_is_infinity(f) && !sign) {
3495         return float16_set_sign(float16_zero, sign);
3496     }
3497 
3498     /* +normal, +subnormal */
3499     uint64_t val = frsqrt7(f, exp_size, frac_size);
3500     return make_float16(val);
3501 }
3502 
3503 static float32 frsqrt7_s(float32 f, float_status *s)
3504 {
3505     int exp_size = 8, frac_size = 23;
3506     bool sign = float32_is_neg(f);
3507 
3508     /*
3509      * frsqrt7(sNaN) = canonical NaN
3510      * frsqrt7(-inf) = canonical NaN
3511      * frsqrt7(-normal) = canonical NaN
3512      * frsqrt7(-subnormal) = canonical NaN
3513      */
3514     if (float32_is_signaling_nan(f, s) ||
3515             (float32_is_infinity(f) && sign) ||
3516             (float32_is_normal(f) && sign) ||
3517             (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3518         s->float_exception_flags |= float_flag_invalid;
3519         return float32_default_nan(s);
3520     }
3521 
3522     /* frsqrt7(qNaN) = canonical NaN */
3523     if (float32_is_quiet_nan(f, s)) {
3524         return float32_default_nan(s);
3525     }
3526 
3527     /* frsqrt7(+-0) = +-inf */
3528     if (float32_is_zero(f)) {
3529         s->float_exception_flags |= float_flag_divbyzero;
3530         return float32_set_sign(float32_infinity, sign);
3531     }
3532 
3533     /* frsqrt7(+inf) = +0 */
3534     if (float32_is_infinity(f) && !sign) {
3535         return float32_set_sign(float32_zero, sign);
3536     }
3537 
3538     /* +normal, +subnormal */
3539     uint64_t val = frsqrt7(f, exp_size, frac_size);
3540     return make_float32(val);
3541 }
3542 
3543 static float64 frsqrt7_d(float64 f, float_status *s)
3544 {
3545     int exp_size = 11, frac_size = 52;
3546     bool sign = float64_is_neg(f);
3547 
3548     /*
3549      * frsqrt7(sNaN) = canonical NaN
3550      * frsqrt7(-inf) = canonical NaN
3551      * frsqrt7(-normal) = canonical NaN
3552      * frsqrt7(-subnormal) = canonical NaN
3553      */
3554     if (float64_is_signaling_nan(f, s) ||
3555             (float64_is_infinity(f) && sign) ||
3556             (float64_is_normal(f) && sign) ||
3557             (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3558         s->float_exception_flags |= float_flag_invalid;
3559         return float64_default_nan(s);
3560     }
3561 
3562     /* frsqrt7(qNaN) = canonical NaN */
3563     if (float64_is_quiet_nan(f, s)) {
3564         return float64_default_nan(s);
3565     }
3566 
3567     /* frsqrt7(+-0) = +-inf */
3568     if (float64_is_zero(f)) {
3569         s->float_exception_flags |= float_flag_divbyzero;
3570         return float64_set_sign(float64_infinity, sign);
3571     }
3572 
3573     /* frsqrt7(+inf) = +0 */
3574     if (float64_is_infinity(f) && !sign) {
3575         return float64_set_sign(float64_zero, sign);
3576     }
3577 
3578     /* +normal, +subnormal */
3579     uint64_t val = frsqrt7(f, exp_size, frac_size);
3580     return make_float64(val);
3581 }
3582 
3583 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3584 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3585 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3586 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
3587 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
3588 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
3589 
3590 /* Vector Floating-Point MIN/MAX Instructions */
3591 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3592 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3593 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3594 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3595 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3596 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3597 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3598 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3599 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3600 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3601 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3602 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3603 
3604 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3605 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3606 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3607 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3608 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3609 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3610 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3611 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3612 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3613 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3614 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3615 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3616 
3617 /* Vector Floating-Point Sign-Injection Instructions */
3618 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3619 {
3620     return deposit64(b, 0, 15, a);
3621 }
3622 
3623 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3624 {
3625     return deposit64(b, 0, 31, a);
3626 }
3627 
3628 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3629 {
3630     return deposit64(b, 0, 63, a);
3631 }
3632 
3633 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3634 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3635 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3636 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3637 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3638 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3639 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3640 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3641 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3642 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3643 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3644 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3645 
3646 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3647 {
3648     return deposit64(~b, 0, 15, a);
3649 }
3650 
3651 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3652 {
3653     return deposit64(~b, 0, 31, a);
3654 }
3655 
3656 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3657 {
3658     return deposit64(~b, 0, 63, a);
3659 }
3660 
3661 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3662 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3663 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3664 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3665 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3666 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3667 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3668 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3669 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3670 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3671 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3672 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3673 
3674 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3675 {
3676     return deposit64(b ^ a, 0, 15, a);
3677 }
3678 
3679 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3680 {
3681     return deposit64(b ^ a, 0, 31, a);
3682 }
3683 
3684 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3685 {
3686     return deposit64(b ^ a, 0, 63, a);
3687 }
3688 
3689 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3690 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3691 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3692 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3693 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3694 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3695 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3696 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3697 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3698 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3699 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3700 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3701 
3702 /* Vector Floating-Point Compare Instructions */
3703 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3704 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3705                   CPURISCVState *env, uint32_t desc)          \
3706 {                                                             \
3707     uint32_t vm = vext_vm(desc);                              \
3708     uint32_t vl = env->vl;                                    \
3709     uint32_t i;                                               \
3710                                                               \
3711     for (i = env->vstart; i < vl; i++) {                      \
3712         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3713         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3714         if (!vm && !vext_elem_mask(v0, i)) {                  \
3715             continue;                                         \
3716         }                                                     \
3717         vext_set_elem_mask(vd, i,                             \
3718                            DO_OP(s2, s1, &env->fp_status));   \
3719     }                                                         \
3720     env->vstart = 0;                                          \
3721 }
3722 
3723 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3724 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3725 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3726 
3727 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3728 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3729                   CPURISCVState *env, uint32_t desc)                \
3730 {                                                                   \
3731     uint32_t vm = vext_vm(desc);                                    \
3732     uint32_t vl = env->vl;                                          \
3733     uint32_t i;                                                     \
3734                                                                     \
3735     for (i = env->vstart; i < vl; i++) {                            \
3736         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3737         if (!vm && !vext_elem_mask(v0, i)) {                        \
3738             continue;                                               \
3739         }                                                           \
3740         vext_set_elem_mask(vd, i,                                   \
3741                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3742     }                                                               \
3743     env->vstart = 0;                                                \
3744 }
3745 
3746 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3747 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3748 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3749 
3750 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3751 {
3752     FloatRelation compare = float16_compare_quiet(a, b, s);
3753     return compare != float_relation_equal;
3754 }
3755 
3756 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3757 {
3758     FloatRelation compare = float32_compare_quiet(a, b, s);
3759     return compare != float_relation_equal;
3760 }
3761 
3762 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3763 {
3764     FloatRelation compare = float64_compare_quiet(a, b, s);
3765     return compare != float_relation_equal;
3766 }
3767 
3768 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3769 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3770 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3771 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3772 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3773 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3774 
3775 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3776 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3777 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3778 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3779 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3780 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3781 
3782 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3783 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
3784 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
3785 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
3786 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
3787 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
3788 
3789 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
3790 {
3791     FloatRelation compare = float16_compare(a, b, s);
3792     return compare == float_relation_greater;
3793 }
3794 
3795 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
3796 {
3797     FloatRelation compare = float32_compare(a, b, s);
3798     return compare == float_relation_greater;
3799 }
3800 
3801 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
3802 {
3803     FloatRelation compare = float64_compare(a, b, s);
3804     return compare == float_relation_greater;
3805 }
3806 
3807 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
3808 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
3809 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
3810 
3811 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
3812 {
3813     FloatRelation compare = float16_compare(a, b, s);
3814     return compare == float_relation_greater ||
3815            compare == float_relation_equal;
3816 }
3817 
3818 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
3819 {
3820     FloatRelation compare = float32_compare(a, b, s);
3821     return compare == float_relation_greater ||
3822            compare == float_relation_equal;
3823 }
3824 
3825 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
3826 {
3827     FloatRelation compare = float64_compare(a, b, s);
3828     return compare == float_relation_greater ||
3829            compare == float_relation_equal;
3830 }
3831 
3832 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
3833 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
3834 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
3835 
3836 /* Vector Floating-Point Classify Instruction */
3837 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
3838 static void do_##NAME(void *vd, void *vs2, int i)      \
3839 {                                                      \
3840     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3841     *((TD *)vd + HD(i)) = OP(s2);                      \
3842 }
3843 
3844 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
3845 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3846                   CPURISCVState *env, uint32_t desc)   \
3847 {                                                      \
3848     uint32_t vm = vext_vm(desc);                       \
3849     uint32_t vl = env->vl;                             \
3850     uint32_t i;                                        \
3851                                                        \
3852     for (i = env->vstart; i < vl; i++) {               \
3853         if (!vm && !vext_elem_mask(v0, i)) {           \
3854             continue;                                  \
3855         }                                              \
3856         do_##NAME(vd, vs2, i);                         \
3857     }                                                  \
3858     env->vstart = 0;                                   \
3859 }
3860 
3861 target_ulong fclass_h(uint64_t frs1)
3862 {
3863     float16 f = frs1;
3864     bool sign = float16_is_neg(f);
3865 
3866     if (float16_is_infinity(f)) {
3867         return sign ? 1 << 0 : 1 << 7;
3868     } else if (float16_is_zero(f)) {
3869         return sign ? 1 << 3 : 1 << 4;
3870     } else if (float16_is_zero_or_denormal(f)) {
3871         return sign ? 1 << 2 : 1 << 5;
3872     } else if (float16_is_any_nan(f)) {
3873         float_status s = { }; /* for snan_bit_is_one */
3874         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3875     } else {
3876         return sign ? 1 << 1 : 1 << 6;
3877     }
3878 }
3879 
3880 target_ulong fclass_s(uint64_t frs1)
3881 {
3882     float32 f = frs1;
3883     bool sign = float32_is_neg(f);
3884 
3885     if (float32_is_infinity(f)) {
3886         return sign ? 1 << 0 : 1 << 7;
3887     } else if (float32_is_zero(f)) {
3888         return sign ? 1 << 3 : 1 << 4;
3889     } else if (float32_is_zero_or_denormal(f)) {
3890         return sign ? 1 << 2 : 1 << 5;
3891     } else if (float32_is_any_nan(f)) {
3892         float_status s = { }; /* for snan_bit_is_one */
3893         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3894     } else {
3895         return sign ? 1 << 1 : 1 << 6;
3896     }
3897 }
3898 
3899 target_ulong fclass_d(uint64_t frs1)
3900 {
3901     float64 f = frs1;
3902     bool sign = float64_is_neg(f);
3903 
3904     if (float64_is_infinity(f)) {
3905         return sign ? 1 << 0 : 1 << 7;
3906     } else if (float64_is_zero(f)) {
3907         return sign ? 1 << 3 : 1 << 4;
3908     } else if (float64_is_zero_or_denormal(f)) {
3909         return sign ? 1 << 2 : 1 << 5;
3910     } else if (float64_is_any_nan(f)) {
3911         float_status s = { }; /* for snan_bit_is_one */
3912         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3913     } else {
3914         return sign ? 1 << 1 : 1 << 6;
3915     }
3916 }
3917 
3918 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
3919 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
3920 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
3921 GEN_VEXT_V(vfclass_v_h, 2, 2)
3922 GEN_VEXT_V(vfclass_v_w, 4, 4)
3923 GEN_VEXT_V(vfclass_v_d, 8, 8)
3924 
3925 /* Vector Floating-Point Merge Instruction */
3926 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
3927 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3928                   CPURISCVState *env, uint32_t desc)          \
3929 {                                                             \
3930     uint32_t vm = vext_vm(desc);                              \
3931     uint32_t vl = env->vl;                                    \
3932     uint32_t i;                                               \
3933                                                               \
3934     for (i = env->vstart; i < vl; i++) {                      \
3935         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3936         *((ETYPE *)vd + H(i))                                 \
3937           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
3938     }                                                         \
3939     env->vstart = 0;                                          \
3940 }
3941 
3942 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
3943 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
3944 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
3945 
3946 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
3947 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3948 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
3949 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
3950 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
3951 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
3952 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
3953 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
3954 
3955 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
3956 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
3957 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
3958 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
3959 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
3960 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
3961 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
3962 
3963 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
3964 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
3965 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
3966 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
3967 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
3968 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
3969 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
3970 
3971 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
3972 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
3973 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
3974 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
3975 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
3976 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
3977 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
3978 
3979 /* Widening Floating-Point/Integer Type-Convert Instructions */
3980 /* (TD, T2, TX2) */
3981 #define WOP_UU_B uint16_t, uint8_t,  uint8_t
3982 #define WOP_UU_H uint32_t, uint16_t, uint16_t
3983 #define WOP_UU_W uint64_t, uint32_t, uint32_t
3984 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
3985 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
3986 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
3987 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
3988 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
3989 
3990 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
3991 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
3992 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
3993 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
3994 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
3995 
3996 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3997 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
3998 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
3999 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4000 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
4001 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
4002 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4003 
4004 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4005 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4006 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4007 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4008 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
4009 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
4010 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4011 
4012 /*
4013  * vfwcvt.f.f.v vd, vs2, vm
4014  * Convert single-width float to double-width float.
4015  */
4016 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4017 {
4018     return float16_to_float32(a, true, s);
4019 }
4020 
4021 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4022 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4023 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
4024 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
4025 
4026 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4027 /* (TD, T2, TX2) */
4028 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
4029 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4030 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4031 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4032 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4033 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4034 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4035 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
4036 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
4037 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
4038 
4039 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4040 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4041 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4042 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4043 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
4044 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
4045 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
4046 
4047 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4048 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4049 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4050 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
4051 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
4052 
4053 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4054 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4055 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4056 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
4057 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
4058 
4059 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4060 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4061 {
4062     return float32_to_float16(a, true, s);
4063 }
4064 
4065 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4066 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4067 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
4068 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
4069 
4070 /*
4071  *** Vector Reduction Operations
4072  */
4073 /* Vector Single-Width Integer Reduction Instructions */
4074 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4075 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4076         void *vs2, CPURISCVState *env, uint32_t desc)     \
4077 {                                                         \
4078     uint32_t vm = vext_vm(desc);                          \
4079     uint32_t vl = env->vl;                                \
4080     uint32_t i;                                           \
4081     TD s1 =  *((TD *)vs1 + HD(0));                        \
4082                                                           \
4083     for (i = env->vstart; i < vl; i++) {                  \
4084         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4085         if (!vm && !vext_elem_mask(v0, i)) {              \
4086             continue;                                     \
4087         }                                                 \
4088         s1 = OP(s1, (TD)s2);                              \
4089     }                                                     \
4090     *((TD *)vd + HD(0)) = s1;                             \
4091     env->vstart = 0;                                      \
4092 }
4093 
4094 /* vd[0] = sum(vs1[0], vs2[*]) */
4095 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4096 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4097 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4098 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4099 
4100 /* vd[0] = maxu(vs1[0], vs2[*]) */
4101 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4102 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4103 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4104 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4105 
4106 /* vd[0] = max(vs1[0], vs2[*]) */
4107 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4108 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4109 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4110 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4111 
4112 /* vd[0] = minu(vs1[0], vs2[*]) */
4113 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4114 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4115 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4116 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4117 
4118 /* vd[0] = min(vs1[0], vs2[*]) */
4119 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4120 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4121 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4122 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4123 
4124 /* vd[0] = and(vs1[0], vs2[*]) */
4125 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4126 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4127 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4128 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4129 
4130 /* vd[0] = or(vs1[0], vs2[*]) */
4131 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4132 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4133 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4134 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4135 
4136 /* vd[0] = xor(vs1[0], vs2[*]) */
4137 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4138 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4139 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4140 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4141 
4142 /* Vector Widening Integer Reduction Instructions */
4143 /* signed sum reduction into double-width accumulator */
4144 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4145 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4146 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4147 
4148 /* Unsigned sum reduction into double-width accumulator */
4149 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4150 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4151 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4152 
4153 /* Vector Single-Width Floating-Point Reduction Instructions */
4154 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4155 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4156                   void *vs2, CPURISCVState *env,           \
4157                   uint32_t desc)                           \
4158 {                                                          \
4159     uint32_t vm = vext_vm(desc);                           \
4160     uint32_t vl = env->vl;                                 \
4161     uint32_t i;                                            \
4162     TD s1 =  *((TD *)vs1 + HD(0));                         \
4163                                                            \
4164     for (i = env->vstart; i < vl; i++) {                   \
4165         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4166         if (!vm && !vext_elem_mask(v0, i)) {               \
4167             continue;                                      \
4168         }                                                  \
4169         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4170     }                                                      \
4171     *((TD *)vd + HD(0)) = s1;                              \
4172     env->vstart = 0;                                       \
4173 }
4174 
4175 /* Unordered sum */
4176 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4177 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4178 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4179 
4180 /* Maximum value */
4181 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4182 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4183 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4184 
4185 /* Minimum value */
4186 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4187 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4188 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4189 
4190 /* Vector Widening Floating-Point Reduction Instructions */
4191 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4192 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4193                             void *vs2, CPURISCVState *env, uint32_t desc)
4194 {
4195     uint32_t vm = vext_vm(desc);
4196     uint32_t vl = env->vl;
4197     uint32_t i;
4198     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4199 
4200     for (i = env->vstart; i < vl; i++) {
4201         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4202         if (!vm && !vext_elem_mask(v0, i)) {
4203             continue;
4204         }
4205         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4206                          &env->fp_status);
4207     }
4208     *((uint32_t *)vd + H4(0)) = s1;
4209     env->vstart = 0;
4210 }
4211 
4212 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4213                             void *vs2, CPURISCVState *env, uint32_t desc)
4214 {
4215     uint32_t vm = vext_vm(desc);
4216     uint32_t vl = env->vl;
4217     uint32_t i;
4218     uint64_t s1 =  *((uint64_t *)vs1);
4219 
4220     for (i = env->vstart; i < vl; i++) {
4221         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4222         if (!vm && !vext_elem_mask(v0, i)) {
4223             continue;
4224         }
4225         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4226                          &env->fp_status);
4227     }
4228     *((uint64_t *)vd) = s1;
4229     env->vstart = 0;
4230 }
4231 
4232 /*
4233  *** Vector Mask Operations
4234  */
4235 /* Vector Mask-Register Logical Instructions */
4236 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4237 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4238                   void *vs2, CPURISCVState *env,          \
4239                   uint32_t desc)                          \
4240 {                                                         \
4241     uint32_t vl = env->vl;                                \
4242     uint32_t i;                                           \
4243     int a, b;                                             \
4244                                                           \
4245     for (i = env->vstart; i < vl; i++) {                  \
4246         a = vext_elem_mask(vs1, i);                       \
4247         b = vext_elem_mask(vs2, i);                       \
4248         vext_set_elem_mask(vd, i, OP(b, a));              \
4249     }                                                     \
4250     env->vstart = 0;                                      \
4251 }
4252 
4253 #define DO_NAND(N, M)  (!(N & M))
4254 #define DO_ANDNOT(N, M)  (N & !M)
4255 #define DO_NOR(N, M)  (!(N | M))
4256 #define DO_ORNOT(N, M)  (N | !M)
4257 #define DO_XNOR(N, M)  (!(N ^ M))
4258 
4259 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4260 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4261 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
4262 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4263 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4264 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4265 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
4266 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4267 
4268 /* Vector count population in mask vcpop */
4269 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4270                              uint32_t desc)
4271 {
4272     target_ulong cnt = 0;
4273     uint32_t vm = vext_vm(desc);
4274     uint32_t vl = env->vl;
4275     int i;
4276 
4277     for (i = env->vstart; i < vl; i++) {
4278         if (vm || vext_elem_mask(v0, i)) {
4279             if (vext_elem_mask(vs2, i)) {
4280                 cnt++;
4281             }
4282         }
4283     }
4284     env->vstart = 0;
4285     return cnt;
4286 }
4287 
4288 /* vfirst find-first-set mask bit*/
4289 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4290                               uint32_t desc)
4291 {
4292     uint32_t vm = vext_vm(desc);
4293     uint32_t vl = env->vl;
4294     int i;
4295 
4296     for (i = env->vstart; i < vl; i++) {
4297         if (vm || vext_elem_mask(v0, i)) {
4298             if (vext_elem_mask(vs2, i)) {
4299                 return i;
4300             }
4301         }
4302     }
4303     env->vstart = 0;
4304     return -1LL;
4305 }
4306 
4307 enum set_mask_type {
4308     ONLY_FIRST = 1,
4309     INCLUDE_FIRST,
4310     BEFORE_FIRST,
4311 };
4312 
4313 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4314                    uint32_t desc, enum set_mask_type type)
4315 {
4316     uint32_t vm = vext_vm(desc);
4317     uint32_t vl = env->vl;
4318     int i;
4319     bool first_mask_bit = false;
4320 
4321     for (i = env->vstart; i < vl; i++) {
4322         if (!vm && !vext_elem_mask(v0, i)) {
4323             continue;
4324         }
4325         /* write a zero to all following active elements */
4326         if (first_mask_bit) {
4327             vext_set_elem_mask(vd, i, 0);
4328             continue;
4329         }
4330         if (vext_elem_mask(vs2, i)) {
4331             first_mask_bit = true;
4332             if (type == BEFORE_FIRST) {
4333                 vext_set_elem_mask(vd, i, 0);
4334             } else {
4335                 vext_set_elem_mask(vd, i, 1);
4336             }
4337         } else {
4338             if (type == ONLY_FIRST) {
4339                 vext_set_elem_mask(vd, i, 0);
4340             } else {
4341                 vext_set_elem_mask(vd, i, 1);
4342             }
4343         }
4344     }
4345     env->vstart = 0;
4346 }
4347 
4348 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4349                      uint32_t desc)
4350 {
4351     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4352 }
4353 
4354 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4355                      uint32_t desc)
4356 {
4357     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4358 }
4359 
4360 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4361                      uint32_t desc)
4362 {
4363     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4364 }
4365 
4366 /* Vector Iota Instruction */
4367 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4368 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4369                   uint32_t desc)                                          \
4370 {                                                                         \
4371     uint32_t vm = vext_vm(desc);                                          \
4372     uint32_t vl = env->vl;                                                \
4373     uint32_t sum = 0;                                                     \
4374     int i;                                                                \
4375                                                                           \
4376     for (i = env->vstart; i < vl; i++) {                                  \
4377         if (!vm && !vext_elem_mask(v0, i)) {                              \
4378             continue;                                                     \
4379         }                                                                 \
4380         *((ETYPE *)vd + H(i)) = sum;                                      \
4381         if (vext_elem_mask(vs2, i)) {                                     \
4382             sum++;                                                        \
4383         }                                                                 \
4384     }                                                                     \
4385     env->vstart = 0;                                                      \
4386 }
4387 
4388 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4389 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4390 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4391 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4392 
4393 /* Vector Element Index Instruction */
4394 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4395 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4396 {                                                                         \
4397     uint32_t vm = vext_vm(desc);                                          \
4398     uint32_t vl = env->vl;                                                \
4399     int i;                                                                \
4400                                                                           \
4401     for (i = env->vstart; i < vl; i++) {                                  \
4402         if (!vm && !vext_elem_mask(v0, i)) {                              \
4403             continue;                                                     \
4404         }                                                                 \
4405         *((ETYPE *)vd + H(i)) = i;                                        \
4406     }                                                                     \
4407     env->vstart = 0;                                                      \
4408 }
4409 
4410 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4411 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4412 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4413 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4414 
4415 /*
4416  *** Vector Permutation Instructions
4417  */
4418 
4419 /* Vector Slide Instructions */
4420 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4421 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4422                   CPURISCVState *env, uint32_t desc)                      \
4423 {                                                                         \
4424     uint32_t vm = vext_vm(desc);                                          \
4425     uint32_t vl = env->vl;                                                \
4426     target_ulong offset = s1, i_min, i;                                   \
4427                                                                           \
4428     i_min = MAX(env->vstart, offset);                                     \
4429     for (i = i_min; i < vl; i++) {                                        \
4430         if (!vm && !vext_elem_mask(v0, i)) {                              \
4431             continue;                                                     \
4432         }                                                                 \
4433         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4434     }                                                                     \
4435 }
4436 
4437 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4438 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4439 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4440 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4441 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4442 
4443 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4444 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4445                   CPURISCVState *env, uint32_t desc)                      \
4446 {                                                                         \
4447     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4448     uint32_t vm = vext_vm(desc);                                          \
4449     uint32_t vl = env->vl;                                                \
4450     target_ulong i_max, i;                                                \
4451                                                                           \
4452     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4453     for (i = env->vstart; i < i_max; ++i) {                               \
4454         if (vm || vext_elem_mask(v0, i)) {                                \
4455             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4456         }                                                                 \
4457     }                                                                     \
4458                                                                           \
4459     for (i = i_max; i < vl; ++i) {                                        \
4460         if (vm || vext_elem_mask(v0, i)) {                                \
4461             *((ETYPE *)vd + H(i)) = 0;                                    \
4462         }                                                                 \
4463     }                                                                     \
4464                                                                           \
4465     env->vstart = 0;                                                      \
4466 }
4467 
4468 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4469 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4470 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4471 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4472 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4473 
4474 #define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4475 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4476                      CPURISCVState *env, uint32_t desc)                     \
4477 {                                                                           \
4478     typedef uint##ESZ##_t ETYPE;                                            \
4479     uint32_t vm = vext_vm(desc);                                            \
4480     uint32_t vl = env->vl;                                                  \
4481     uint32_t i;                                                             \
4482                                                                             \
4483     for (i = env->vstart; i < vl; i++) {                                    \
4484         if (!vm && !vext_elem_mask(v0, i)) {                                \
4485             continue;                                                       \
4486         }                                                                   \
4487         if (i == 0) {                                                       \
4488             *((ETYPE *)vd + H(i)) = s1;                                     \
4489         } else {                                                            \
4490             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4491         }                                                                   \
4492     }                                                                       \
4493     env->vstart = 0;                                                        \
4494 }
4495 
4496 GEN_VEXT_VSLIE1UP(8,  H1)
4497 GEN_VEXT_VSLIE1UP(16, H2)
4498 GEN_VEXT_VSLIE1UP(32, H4)
4499 GEN_VEXT_VSLIE1UP(64, H8)
4500 
4501 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4502 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4503                   CPURISCVState *env, uint32_t desc)              \
4504 {                                                                 \
4505     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4506 }
4507 
4508 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4509 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4510 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4511 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4512 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4513 
4514 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4515 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4516                        CPURISCVState *env, uint32_t desc)                     \
4517 {                                                                             \
4518     typedef uint##ESZ##_t ETYPE;                                              \
4519     uint32_t vm = vext_vm(desc);                                              \
4520     uint32_t vl = env->vl;                                                    \
4521     uint32_t i;                                                               \
4522                                                                               \
4523     for (i = env->vstart; i < vl; i++) {                                      \
4524         if (!vm && !vext_elem_mask(v0, i)) {                                  \
4525             continue;                                                         \
4526         }                                                                     \
4527         if (i == vl - 1) {                                                    \
4528             *((ETYPE *)vd + H(i)) = s1;                                       \
4529         } else {                                                              \
4530             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4531         }                                                                     \
4532     }                                                                         \
4533     env->vstart = 0;                                                          \
4534 }
4535 
4536 GEN_VEXT_VSLIDE1DOWN(8,  H1)
4537 GEN_VEXT_VSLIDE1DOWN(16, H2)
4538 GEN_VEXT_VSLIDE1DOWN(32, H4)
4539 GEN_VEXT_VSLIDE1DOWN(64, H8)
4540 
4541 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4542 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4543                   CPURISCVState *env, uint32_t desc)              \
4544 {                                                                 \
4545     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4546 }
4547 
4548 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4549 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4550 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4551 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4552 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4553 
4554 /* Vector Floating-Point Slide Instructions */
4555 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4556 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4557                   CPURISCVState *env, uint32_t desc)          \
4558 {                                                             \
4559     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4560 }
4561 
4562 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4563 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4564 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4565 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4566 
4567 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4568 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4569                   CPURISCVState *env, uint32_t desc)          \
4570 {                                                             \
4571     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4572 }
4573 
4574 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4575 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4576 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4577 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4578 
4579 /* Vector Register Gather Instruction */
4580 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4581 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4582                   CPURISCVState *env, uint32_t desc)                      \
4583 {                                                                         \
4584     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4585     uint32_t vm = vext_vm(desc);                                          \
4586     uint32_t vl = env->vl;                                                \
4587     uint64_t index;                                                       \
4588     uint32_t i;                                                           \
4589                                                                           \
4590     for (i = env->vstart; i < vl; i++) {                                  \
4591         if (!vm && !vext_elem_mask(v0, i)) {                              \
4592             continue;                                                     \
4593         }                                                                 \
4594         index = *((TS1 *)vs1 + HS1(i));                                   \
4595         if (index >= vlmax) {                                             \
4596             *((TS2 *)vd + HS2(i)) = 0;                                    \
4597         } else {                                                          \
4598             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4599         }                                                                 \
4600     }                                                                     \
4601     env->vstart = 0;                                                      \
4602 }
4603 
4604 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4605 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4606 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4607 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4608 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4609 
4610 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4611 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4612 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4613 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4614 
4615 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4616 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4617                   CPURISCVState *env, uint32_t desc)                      \
4618 {                                                                         \
4619     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4620     uint32_t vm = vext_vm(desc);                                          \
4621     uint32_t vl = env->vl;                                                \
4622     uint64_t index = s1;                                                  \
4623     uint32_t i;                                                           \
4624                                                                           \
4625     for (i = env->vstart; i < vl; i++) {                                  \
4626         if (!vm && !vext_elem_mask(v0, i)) {                              \
4627             continue;                                                     \
4628         }                                                                 \
4629         if (index >= vlmax) {                                             \
4630             *((ETYPE *)vd + H(i)) = 0;                                    \
4631         } else {                                                          \
4632             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4633         }                                                                 \
4634     }                                                                     \
4635     env->vstart = 0;                                                      \
4636 }
4637 
4638 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4639 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4640 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4641 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4642 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4643 
4644 /* Vector Compress Instruction */
4645 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4646 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4647                   CPURISCVState *env, uint32_t desc)                      \
4648 {                                                                         \
4649     uint32_t vl = env->vl;                                                \
4650     uint32_t num = 0, i;                                                  \
4651                                                                           \
4652     for (i = env->vstart; i < vl; i++) {                                  \
4653         if (!vext_elem_mask(vs1, i)) {                                    \
4654             continue;                                                     \
4655         }                                                                 \
4656         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4657         num++;                                                            \
4658     }                                                                     \
4659     env->vstart = 0;                                                      \
4660 }
4661 
4662 /* Compress into vd elements of vs2 where vs1 is enabled */
4663 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4664 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4665 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4666 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4667 
4668 /* Vector Whole Register Move */
4669 #define GEN_VEXT_VMV_WHOLE(NAME, LEN)                      \
4670 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
4671                   uint32_t desc)                           \
4672 {                                                          \
4673     /* EEW = 8 */                                          \
4674     uint32_t maxsz = simd_maxsz(desc);                     \
4675     uint32_t i = env->vstart;                              \
4676                                                            \
4677     memcpy((uint8_t *)vd + H1(i),                          \
4678            (uint8_t *)vs2 + H1(i),                         \
4679            maxsz - env->vstart);                           \
4680                                                            \
4681     env->vstart = 0;                                       \
4682 }
4683 
4684 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
4685 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
4686 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
4687 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
4688 
4689 /* Vector Integer Extension */
4690 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4691 void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4692                   CPURISCVState *env, uint32_t desc)             \
4693 {                                                                \
4694     uint32_t vl = env->vl;                                       \
4695     uint32_t vm = vext_vm(desc);                                 \
4696     uint32_t i;                                                  \
4697                                                                  \
4698     for (i = env->vstart; i < vl; i++) {                         \
4699         if (!vm && !vext_elem_mask(v0, i)) {                     \
4700             continue;                                            \
4701         }                                                        \
4702         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4703     }                                                            \
4704     env->vstart = 0;                                             \
4705 }
4706 
4707 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4708 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4709 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4710 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4711 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4712 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4713 
4714 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4715 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4716 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4717 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4718 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4719 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4720