xref: /openbmc/qemu/target/riscv/vector_helper.c (revision f714361e)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "cpu.h"
22 #include "exec/memop.h"
23 #include "exec/exec-all.h"
24 #include "exec/helper-proto.h"
25 #include "fpu/softfloat.h"
26 #include "tcg/tcg-gvec-desc.h"
27 #include "internals.h"
28 #include <math.h>
29 
30 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
31                             target_ulong s2)
32 {
33     int vlmax, vl;
34     RISCVCPU *cpu = env_archcpu(env);
35     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
36     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
37     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
38     bool vill = FIELD_EX64(s2, VTYPE, VILL);
39     target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
40 
41     if (lmul & 4) {
42         /* Fractional LMUL. */
43         if (lmul == 4 ||
44             cpu->cfg.elen >> (8 - lmul) < sew) {
45             vill = true;
46         }
47     }
48 
49     if ((sew > cpu->cfg.elen)
50         || vill
51         || (ediv != 0)
52         || (reserved != 0)) {
53         /* only set vill bit. */
54         env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
55         env->vl = 0;
56         env->vstart = 0;
57         return 0;
58     }
59 
60     vlmax = vext_get_vlmax(cpu, s2);
61     if (s1 <= vlmax) {
62         vl = s1;
63     } else {
64         vl = vlmax;
65     }
66     env->vl = vl;
67     env->vtype = s2;
68     env->vstart = 0;
69     return vl;
70 }
71 
72 /*
73  * Note that vector data is stored in host-endian 64-bit chunks,
74  * so addressing units smaller than that needs a host-endian fixup.
75  */
76 #ifdef HOST_WORDS_BIGENDIAN
77 #define H1(x)   ((x) ^ 7)
78 #define H1_2(x) ((x) ^ 6)
79 #define H1_4(x) ((x) ^ 4)
80 #define H2(x)   ((x) ^ 3)
81 #define H4(x)   ((x) ^ 1)
82 #define H8(x)   ((x))
83 #else
84 #define H1(x)   (x)
85 #define H1_2(x) (x)
86 #define H1_4(x) (x)
87 #define H2(x)   (x)
88 #define H4(x)   (x)
89 #define H8(x)   (x)
90 #endif
91 
92 static inline uint32_t vext_nf(uint32_t desc)
93 {
94     return FIELD_EX32(simd_data(desc), VDATA, NF);
95 }
96 
97 static inline uint32_t vext_vm(uint32_t desc)
98 {
99     return FIELD_EX32(simd_data(desc), VDATA, VM);
100 }
101 
102 /*
103  * Encode LMUL to lmul as following:
104  *     LMUL    vlmul    lmul
105  *      1       000       0
106  *      2       001       1
107  *      4       010       2
108  *      8       011       3
109  *      -       100       -
110  *     1/8      101      -3
111  *     1/4      110      -2
112  *     1/2      111      -1
113  */
114 static inline int32_t vext_lmul(uint32_t desc)
115 {
116     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
117 }
118 
119 /*
120  * Get the maximum number of elements can be operated.
121  *
122  * esz: log2 of element size in bytes.
123  */
124 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
125 {
126     /*
127      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
128      * so vlen in bytes (vlenb) is encoded as maxsz.
129      */
130     uint32_t vlenb = simd_maxsz(desc);
131 
132     /* Return VLMAX */
133     int scale = vext_lmul(desc) - esz;
134     return scale < 0 ? vlenb >> -scale : vlenb << scale;
135 }
136 
137 /*
138  * This function checks watchpoint before real load operation.
139  *
140  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
141  * In user mode, there is no watchpoint support now.
142  *
143  * It will trigger an exception if there is no mapping in TLB
144  * and page table walk can't fill the TLB entry. Then the guest
145  * software can return here after process the exception or never return.
146  */
147 static void probe_pages(CPURISCVState *env, target_ulong addr,
148                         target_ulong len, uintptr_t ra,
149                         MMUAccessType access_type)
150 {
151     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
152     target_ulong curlen = MIN(pagelen, len);
153 
154     probe_access(env, addr, curlen, access_type,
155                  cpu_mmu_index(env, false), ra);
156     if (len > curlen) {
157         addr += curlen;
158         curlen = len - curlen;
159         probe_access(env, addr, curlen, access_type,
160                      cpu_mmu_index(env, false), ra);
161     }
162 }
163 
164 static inline void vext_set_elem_mask(void *v0, int index,
165                                       uint8_t value)
166 {
167     int idx = index / 64;
168     int pos = index % 64;
169     uint64_t old = ((uint64_t *)v0)[idx];
170     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
171 }
172 
173 /*
174  * Earlier designs (pre-0.9) had a varying number of bits
175  * per mask value (MLEN). In the 0.9 design, MLEN=1.
176  * (Section 4.5)
177  */
178 static inline int vext_elem_mask(void *v0, int index)
179 {
180     int idx = index / 64;
181     int pos = index  % 64;
182     return (((uint64_t *)v0)[idx] >> pos) & 1;
183 }
184 
185 /* elements operations for load and store */
186 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
187                                uint32_t idx, void *vd, uintptr_t retaddr);
188 
189 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
190 static void NAME(CPURISCVState *env, abi_ptr addr,         \
191                  uint32_t idx, void *vd, uintptr_t retaddr)\
192 {                                                          \
193     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
194     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
195 }                                                          \
196 
197 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
198 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
199 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
200 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
201 
202 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
203 static void NAME(CPURISCVState *env, abi_ptr addr,         \
204                  uint32_t idx, void *vd, uintptr_t retaddr)\
205 {                                                          \
206     ETYPE data = *((ETYPE *)vd + H(idx));                  \
207     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
208 }
209 
210 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
211 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
212 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
213 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
214 
215 /*
216  *** stride: access vector element from strided memory
217  */
218 static void
219 vext_ldst_stride(void *vd, void *v0, target_ulong base,
220                  target_ulong stride, CPURISCVState *env,
221                  uint32_t desc, uint32_t vm,
222                  vext_ldst_elem_fn *ldst_elem,
223                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
224 {
225     uint32_t i, k;
226     uint32_t nf = vext_nf(desc);
227     uint32_t max_elems = vext_max_elems(desc, esz);
228 
229     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
230         if (!vm && !vext_elem_mask(v0, i)) {
231             continue;
232         }
233 
234         k = 0;
235         while (k < nf) {
236             target_ulong addr = base + stride * i + (k << esz);
237             ldst_elem(env, addr, i + k * max_elems, vd, ra);
238             k++;
239         }
240     }
241     env->vstart = 0;
242 }
243 
244 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
245 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
246                   target_ulong stride, CPURISCVState *env,              \
247                   uint32_t desc)                                        \
248 {                                                                       \
249     uint32_t vm = vext_vm(desc);                                        \
250     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
251                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
252 }
253 
254 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
255 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
256 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
257 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
258 
259 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
260 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
261                   target_ulong stride, CPURISCVState *env,              \
262                   uint32_t desc)                                        \
263 {                                                                       \
264     uint32_t vm = vext_vm(desc);                                        \
265     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
266                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
267 }
268 
269 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
270 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
271 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
272 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
273 
274 /*
275  *** unit-stride: access elements stored contiguously in memory
276  */
277 
278 /* unmasked unit-stride load and store operation*/
279 static void
280 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
281              vext_ldst_elem_fn *ldst_elem,
282              uint32_t esz, uintptr_t ra, MMUAccessType access_type)
283 {
284     uint32_t i, k;
285     uint32_t nf = vext_nf(desc);
286     uint32_t max_elems = vext_max_elems(desc, esz);
287 
288     /* load bytes from guest memory */
289     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
290         k = 0;
291         while (k < nf) {
292             target_ulong addr = base + ((i * nf + k) << esz);
293             ldst_elem(env, addr, i + k * max_elems, vd, ra);
294             k++;
295         }
296     }
297     env->vstart = 0;
298 }
299 
300 /*
301  * masked unit-stride load and store operation will be a special case of stride,
302  * stride = NF * sizeof (MTYPE)
303  */
304 
305 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
306 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
307                          CPURISCVState *env, uint32_t desc)             \
308 {                                                                       \
309     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
310     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
311                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
312 }                                                                       \
313                                                                         \
314 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
315                   CPURISCVState *env, uint32_t desc)                    \
316 {                                                                       \
317     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
318                  ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);          \
319 }
320 
321 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
322 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
323 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
324 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
325 
326 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                           \
327 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
328                          CPURISCVState *env, uint32_t desc)             \
329 {                                                                       \
330     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
331     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,  \
332                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
333 }                                                                       \
334                                                                         \
335 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
336                   CPURISCVState *env, uint32_t desc)                    \
337 {                                                                       \
338     vext_ldst_us(vd, base, env, desc, STORE_FN,                         \
339                  ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);         \
340 }
341 
342 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
343 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
344 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
345 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
346 
347 /*
348  *** index: access vector element from indexed memory
349  */
350 typedef target_ulong vext_get_index_addr(target_ulong base,
351         uint32_t idx, void *vs2);
352 
353 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
354 static target_ulong NAME(target_ulong base,            \
355                          uint32_t idx, void *vs2)      \
356 {                                                      \
357     return (base + *((ETYPE *)vs2 + H(idx)));          \
358 }
359 
360 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
361 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
362 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
363 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
364 
365 static inline void
366 vext_ldst_index(void *vd, void *v0, target_ulong base,
367                 void *vs2, CPURISCVState *env, uint32_t desc,
368                 vext_get_index_addr get_index_addr,
369                 vext_ldst_elem_fn *ldst_elem,
370                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
371 {
372     uint32_t i, k;
373     uint32_t nf = vext_nf(desc);
374     uint32_t vm = vext_vm(desc);
375     uint32_t max_elems = vext_max_elems(desc, esz);
376 
377     /* load bytes from guest memory */
378     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
379         if (!vm && !vext_elem_mask(v0, i)) {
380             continue;
381         }
382 
383         k = 0;
384         while (k < nf) {
385             abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
386             ldst_elem(env, addr, i + k * max_elems, vd, ra);
387             k++;
388         }
389     }
390     env->vstart = 0;
391 }
392 
393 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
394 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
395                   void *vs2, CPURISCVState *env, uint32_t desc)            \
396 {                                                                          \
397     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
398                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
399 }
400 
401 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
402 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
403 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
404 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
405 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
406 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
407 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
408 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
409 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
410 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
411 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
412 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
413 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
414 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
415 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
416 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
417 
418 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
419 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
420                   void *vs2, CPURISCVState *env, uint32_t desc)  \
421 {                                                                \
422     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
423                     STORE_FN, ctzl(sizeof(ETYPE)),               \
424                     GETPC(), MMU_DATA_STORE);                    \
425 }
426 
427 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
428 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
429 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
430 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
431 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
432 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
433 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
434 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
435 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
436 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
437 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
438 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
439 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
440 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
441 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
442 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
443 
444 /*
445  *** unit-stride fault-only-fisrt load instructions
446  */
447 static inline void
448 vext_ldff(void *vd, void *v0, target_ulong base,
449           CPURISCVState *env, uint32_t desc,
450           vext_ldst_elem_fn *ldst_elem,
451           uint32_t esz, uintptr_t ra)
452 {
453     void *host;
454     uint32_t i, k, vl = 0;
455     uint32_t nf = vext_nf(desc);
456     uint32_t vm = vext_vm(desc);
457     uint32_t max_elems = vext_max_elems(desc, esz);
458     target_ulong addr, offset, remain;
459 
460     /* probe every access*/
461     for (i = env->vstart; i < env->vl; i++) {
462         if (!vm && !vext_elem_mask(v0, i)) {
463             continue;
464         }
465         addr = base + i * (nf << esz);
466         if (i == 0) {
467             probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
468         } else {
469             /* if it triggers an exception, no need to check watchpoint */
470             remain = nf << esz;
471             while (remain > 0) {
472                 offset = -(addr | TARGET_PAGE_MASK);
473                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
474                                          cpu_mmu_index(env, false));
475                 if (host) {
476 #ifdef CONFIG_USER_ONLY
477                     if (page_check_range(addr, nf << esz, PAGE_READ) < 0) {
478                         vl = i;
479                         goto ProbeSuccess;
480                     }
481 #else
482                     probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
483 #endif
484                 } else {
485                     vl = i;
486                     goto ProbeSuccess;
487                 }
488                 if (remain <=  offset) {
489                     break;
490                 }
491                 remain -= offset;
492                 addr += offset;
493             }
494         }
495     }
496 ProbeSuccess:
497     /* load bytes from guest memory */
498     if (vl != 0) {
499         env->vl = vl;
500     }
501     for (i = env->vstart; i < env->vl; i++) {
502         k = 0;
503         if (!vm && !vext_elem_mask(v0, i)) {
504             continue;
505         }
506         while (k < nf) {
507             target_ulong addr = base + ((i * nf + k) << esz);
508             ldst_elem(env, addr, i + k * max_elems, vd, ra);
509             k++;
510         }
511     }
512     env->vstart = 0;
513 }
514 
515 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
516 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
517                   CPURISCVState *env, uint32_t desc)      \
518 {                                                         \
519     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
520               ctzl(sizeof(ETYPE)), GETPC());              \
521 }
522 
523 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
524 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
525 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
526 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
527 
528 #define DO_SWAP(N, M) (M)
529 #define DO_AND(N, M)  (N & M)
530 #define DO_XOR(N, M)  (N ^ M)
531 #define DO_OR(N, M)   (N | M)
532 #define DO_ADD(N, M)  (N + M)
533 
534 /* Signed min/max */
535 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
536 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
537 
538 /* Unsigned min/max */
539 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
540 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
541 
542 /*
543  *** load and store whole register instructions
544  */
545 static void
546 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
547                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
548                 MMUAccessType access_type)
549 {
550     uint32_t i, k, off, pos;
551     uint32_t nf = vext_nf(desc);
552     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
553     uint32_t max_elems = vlenb >> esz;
554 
555     k = env->vstart / max_elems;
556     off = env->vstart % max_elems;
557 
558     if (off) {
559         /* load/store rest of elements of current segment pointed by vstart */
560         for (pos = off; pos < max_elems; pos++, env->vstart++) {
561             target_ulong addr = base + ((pos + k * max_elems) << esz);
562             ldst_elem(env, addr, pos + k * max_elems, vd, ra);
563         }
564         k++;
565     }
566 
567     /* load/store elements for rest of segments */
568     for (; k < nf; k++) {
569         for (i = 0; i < max_elems; i++, env->vstart++) {
570             target_ulong addr = base + ((i + k * max_elems) << esz);
571             ldst_elem(env, addr, i + k * max_elems, vd, ra);
572         }
573     }
574 
575     env->vstart = 0;
576 }
577 
578 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
579 void HELPER(NAME)(void *vd, target_ulong base,       \
580                   CPURISCVState *env, uint32_t desc) \
581 {                                                    \
582     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
583                     ctzl(sizeof(ETYPE)), GETPC(),    \
584                     MMU_DATA_LOAD);                  \
585 }
586 
587 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
588 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
589 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
590 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
591 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
592 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
593 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
594 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
595 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
596 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
597 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
598 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
599 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
600 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
601 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
602 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
603 
604 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
605 void HELPER(NAME)(void *vd, target_ulong base,       \
606                   CPURISCVState *env, uint32_t desc) \
607 {                                                    \
608     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
609                     ctzl(sizeof(ETYPE)), GETPC(),    \
610                     MMU_DATA_STORE);                 \
611 }
612 
613 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
614 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
615 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
616 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
617 
618 /*
619  *** Vector Integer Arithmetic Instructions
620  */
621 
622 /* expand macro args before macro */
623 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
624 
625 /* (TD, T1, T2, TX1, TX2) */
626 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
627 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
628 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
629 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
630 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
631 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
632 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
633 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
634 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
635 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
636 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
637 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
638 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
639 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
640 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
641 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
642 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
643 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
644 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
645 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
646 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
647 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
648 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
649 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
650 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
651 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
652 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
653 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
654 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
655 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
656 
657 /* operation of two vector elements */
658 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
659 
660 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
661 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
662 {                                                               \
663     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
664     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
665     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
666 }
667 #define DO_SUB(N, M) (N - M)
668 #define DO_RSUB(N, M) (M - N)
669 
670 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
671 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
672 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
673 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
674 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
675 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
676 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
677 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
678 
679 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
680                        CPURISCVState *env, uint32_t desc,
681                        uint32_t esz, uint32_t dsz,
682                        opivv2_fn *fn)
683 {
684     uint32_t vm = vext_vm(desc);
685     uint32_t vl = env->vl;
686     uint32_t i;
687 
688     for (i = env->vstart; i < vl; i++) {
689         if (!vm && !vext_elem_mask(v0, i)) {
690             continue;
691         }
692         fn(vd, vs1, vs2, i);
693     }
694     env->vstart = 0;
695 }
696 
697 /* generate the helpers for OPIVV */
698 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
699 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
700                   void *vs2, CPURISCVState *env,          \
701                   uint32_t desc)                          \
702 {                                                         \
703     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
704                do_##NAME);                                \
705 }
706 
707 GEN_VEXT_VV(vadd_vv_b, 1, 1)
708 GEN_VEXT_VV(vadd_vv_h, 2, 2)
709 GEN_VEXT_VV(vadd_vv_w, 4, 4)
710 GEN_VEXT_VV(vadd_vv_d, 8, 8)
711 GEN_VEXT_VV(vsub_vv_b, 1, 1)
712 GEN_VEXT_VV(vsub_vv_h, 2, 2)
713 GEN_VEXT_VV(vsub_vv_w, 4, 4)
714 GEN_VEXT_VV(vsub_vv_d, 8, 8)
715 
716 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
717 
718 /*
719  * (T1)s1 gives the real operator type.
720  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
721  */
722 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
723 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
724 {                                                                   \
725     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
726     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
727 }
728 
729 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
730 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
731 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
732 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
733 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
734 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
735 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
736 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
737 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
738 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
739 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
740 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
741 
742 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
743                        CPURISCVState *env, uint32_t desc,
744                        uint32_t esz, uint32_t dsz,
745                        opivx2_fn fn)
746 {
747     uint32_t vm = vext_vm(desc);
748     uint32_t vl = env->vl;
749     uint32_t i;
750 
751     for (i = env->vstart; i < vl; i++) {
752         if (!vm && !vext_elem_mask(v0, i)) {
753             continue;
754         }
755         fn(vd, s1, vs2, i);
756     }
757     env->vstart = 0;
758 }
759 
760 /* generate the helpers for OPIVX */
761 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
762 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
763                   void *vs2, CPURISCVState *env,          \
764                   uint32_t desc)                          \
765 {                                                         \
766     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
767                do_##NAME);                                \
768 }
769 
770 GEN_VEXT_VX(vadd_vx_b, 1, 1)
771 GEN_VEXT_VX(vadd_vx_h, 2, 2)
772 GEN_VEXT_VX(vadd_vx_w, 4, 4)
773 GEN_VEXT_VX(vadd_vx_d, 8, 8)
774 GEN_VEXT_VX(vsub_vx_b, 1, 1)
775 GEN_VEXT_VX(vsub_vx_h, 2, 2)
776 GEN_VEXT_VX(vsub_vx_w, 4, 4)
777 GEN_VEXT_VX(vsub_vx_d, 8, 8)
778 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
779 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
780 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
781 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
782 
783 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
784 {
785     intptr_t oprsz = simd_oprsz(desc);
786     intptr_t i;
787 
788     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
789         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
790     }
791 }
792 
793 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
794 {
795     intptr_t oprsz = simd_oprsz(desc);
796     intptr_t i;
797 
798     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
799         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
800     }
801 }
802 
803 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
804 {
805     intptr_t oprsz = simd_oprsz(desc);
806     intptr_t i;
807 
808     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
809         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
810     }
811 }
812 
813 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
814 {
815     intptr_t oprsz = simd_oprsz(desc);
816     intptr_t i;
817 
818     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
819         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
820     }
821 }
822 
823 /* Vector Widening Integer Add/Subtract */
824 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
825 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
826 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
827 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
828 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
829 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
830 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
831 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
832 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
833 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
834 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
835 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
836 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
837 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
838 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
839 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
840 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
841 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
842 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
843 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
844 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
845 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
846 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
847 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
848 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
849 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
850 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
851 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
852 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
853 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
854 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
855 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
856 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
857 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
858 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
859 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
860 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
861 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
862 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
863 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
864 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
865 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
866 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
867 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
868 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
869 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
870 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
871 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
872 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
873 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
874 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
875 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
876 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
877 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
878 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
879 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
880 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
881 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
882 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
883 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
884 
885 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
886 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
887 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
888 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
889 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
890 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
891 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
892 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
893 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
894 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
895 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
896 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
897 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
898 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
899 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
900 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
901 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
902 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
903 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
904 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
905 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
906 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
907 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
908 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
909 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
910 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
911 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
912 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
913 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
914 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
915 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
916 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
917 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
918 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
919 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
920 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
921 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
922 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
923 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
924 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
925 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
926 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
927 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
928 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
929 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
930 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
931 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
932 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
933 
934 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
935 #define DO_VADC(N, M, C) (N + M + C)
936 #define DO_VSBC(N, M, C) (N - M - C)
937 
938 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
939 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
940                   CPURISCVState *env, uint32_t desc)          \
941 {                                                             \
942     uint32_t vl = env->vl;                                    \
943     uint32_t i;                                               \
944                                                               \
945     for (i = env->vstart; i < vl; i++) {                      \
946         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
947         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
948         ETYPE carry = vext_elem_mask(v0, i);                  \
949                                                               \
950         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
951     }                                                         \
952     env->vstart = 0;                                          \
953 }
954 
955 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
956 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
957 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
958 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
959 
960 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
961 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
962 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
963 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
964 
965 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
966 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
967                   CPURISCVState *env, uint32_t desc)                     \
968 {                                                                        \
969     uint32_t vl = env->vl;                                               \
970     uint32_t i;                                                          \
971                                                                          \
972     for (i = env->vstart; i < vl; i++) {                                 \
973         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
974         ETYPE carry = vext_elem_mask(v0, i);                             \
975                                                                          \
976         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
977     }                                                                    \
978     env->vstart = 0;                                          \
979 }
980 
981 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
982 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
983 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
984 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
985 
986 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
987 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
988 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
989 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
990 
991 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
992                           (__typeof(N))(N + M) < N)
993 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
994 
995 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
996 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
997                   CPURISCVState *env, uint32_t desc)          \
998 {                                                             \
999     uint32_t vl = env->vl;                                    \
1000     uint32_t vm = vext_vm(desc);                              \
1001     uint32_t i;                                               \
1002                                                               \
1003     for (i = env->vstart; i < vl; i++) {                      \
1004         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1005         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1006         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1007         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1008     }                                                         \
1009     env->vstart = 0;                                          \
1010 }
1011 
1012 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1013 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1014 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1015 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1016 
1017 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1018 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1019 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1020 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1021 
1022 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1023 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1024                   void *vs2, CPURISCVState *env, uint32_t desc) \
1025 {                                                               \
1026     uint32_t vl = env->vl;                                      \
1027     uint32_t vm = vext_vm(desc);                                \
1028     uint32_t i;                                                 \
1029                                                                 \
1030     for (i = env->vstart; i < vl; i++) {                        \
1031         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1032         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1033         vext_set_elem_mask(vd, i,                               \
1034                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1035     }                                                           \
1036     env->vstart = 0;                                            \
1037 }
1038 
1039 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1040 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1041 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1042 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1043 
1044 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1045 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1046 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1047 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1048 
1049 /* Vector Bitwise Logical Instructions */
1050 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1051 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1052 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1053 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1054 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1055 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1056 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1057 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1058 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1059 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1060 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1061 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1062 GEN_VEXT_VV(vand_vv_b, 1, 1)
1063 GEN_VEXT_VV(vand_vv_h, 2, 2)
1064 GEN_VEXT_VV(vand_vv_w, 4, 4)
1065 GEN_VEXT_VV(vand_vv_d, 8, 8)
1066 GEN_VEXT_VV(vor_vv_b, 1, 1)
1067 GEN_VEXT_VV(vor_vv_h, 2, 2)
1068 GEN_VEXT_VV(vor_vv_w, 4, 4)
1069 GEN_VEXT_VV(vor_vv_d, 8, 8)
1070 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1071 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1072 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1073 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1074 
1075 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1076 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1077 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1078 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1079 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1080 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1081 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1082 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1083 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1084 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1085 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1086 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1087 GEN_VEXT_VX(vand_vx_b, 1, 1)
1088 GEN_VEXT_VX(vand_vx_h, 2, 2)
1089 GEN_VEXT_VX(vand_vx_w, 4, 4)
1090 GEN_VEXT_VX(vand_vx_d, 8, 8)
1091 GEN_VEXT_VX(vor_vx_b, 1, 1)
1092 GEN_VEXT_VX(vor_vx_h, 2, 2)
1093 GEN_VEXT_VX(vor_vx_w, 4, 4)
1094 GEN_VEXT_VX(vor_vx_d, 8, 8)
1095 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1096 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1097 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1098 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1099 
1100 /* Vector Single-Width Bit Shift Instructions */
1101 #define DO_SLL(N, M)  (N << (M))
1102 #define DO_SRL(N, M)  (N >> (M))
1103 
1104 /* generate the helpers for shift instructions with two vector operators */
1105 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1106 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1107                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1108 {                                                                         \
1109     uint32_t vm = vext_vm(desc);                                          \
1110     uint32_t vl = env->vl;                                                \
1111     uint32_t i;                                                           \
1112                                                                           \
1113     for (i = env->vstart; i < vl; i++) {                                  \
1114         if (!vm && !vext_elem_mask(v0, i)) {                              \
1115             continue;                                                     \
1116         }                                                                 \
1117         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1118         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1119         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1120     }                                                                     \
1121     env->vstart = 0;                                                      \
1122 }
1123 
1124 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1125 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1126 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1127 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1128 
1129 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1130 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1131 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1132 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1133 
1134 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1135 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1136 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1137 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1138 
1139 /* generate the helpers for shift instructions with one vector and one scalar */
1140 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1141 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1142         void *vs2, CPURISCVState *env, uint32_t desc)       \
1143 {                                                           \
1144     uint32_t vm = vext_vm(desc);                            \
1145     uint32_t vl = env->vl;                                  \
1146     uint32_t i;                                             \
1147                                                             \
1148     for (i = env->vstart; i < vl; i++) {                    \
1149         if (!vm && !vext_elem_mask(v0, i)) {                \
1150             continue;                                       \
1151         }                                                   \
1152         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1153         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1154     }                                                       \
1155     env->vstart = 0;                                        \
1156 }
1157 
1158 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1159 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1160 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1161 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1162 
1163 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1164 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1165 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1166 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1167 
1168 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1169 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1170 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1171 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1172 
1173 /* Vector Narrowing Integer Right Shift Instructions */
1174 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1175 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1176 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1177 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1178 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1179 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1180 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1181 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1182 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1183 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1184 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1185 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1186 
1187 /* Vector Integer Comparison Instructions */
1188 #define DO_MSEQ(N, M) (N == M)
1189 #define DO_MSNE(N, M) (N != M)
1190 #define DO_MSLT(N, M) (N < M)
1191 #define DO_MSLE(N, M) (N <= M)
1192 #define DO_MSGT(N, M) (N > M)
1193 
1194 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1195 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1196                   CPURISCVState *env, uint32_t desc)          \
1197 {                                                             \
1198     uint32_t vm = vext_vm(desc);                              \
1199     uint32_t vl = env->vl;                                    \
1200     uint32_t i;                                               \
1201                                                               \
1202     for (i = env->vstart; i < vl; i++) {                      \
1203         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1204         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1205         if (!vm && !vext_elem_mask(v0, i)) {                  \
1206             continue;                                         \
1207         }                                                     \
1208         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1209     }                                                         \
1210     env->vstart = 0;                                          \
1211 }
1212 
1213 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1214 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1215 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1216 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1217 
1218 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1219 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1220 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1221 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1222 
1223 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1224 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1225 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1226 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1227 
1228 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1229 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1230 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1231 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1232 
1233 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1234 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1235 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1236 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1237 
1238 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1239 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1240 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1241 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1242 
1243 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1244 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1245                   CPURISCVState *env, uint32_t desc)                \
1246 {                                                                   \
1247     uint32_t vm = vext_vm(desc);                                    \
1248     uint32_t vl = env->vl;                                          \
1249     uint32_t i;                                                     \
1250                                                                     \
1251     for (i = env->vstart; i < vl; i++) {                            \
1252         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1253         if (!vm && !vext_elem_mask(v0, i)) {                        \
1254             continue;                                               \
1255         }                                                           \
1256         vext_set_elem_mask(vd, i,                                   \
1257                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1258     }                                                               \
1259     env->vstart = 0;                                                \
1260 }
1261 
1262 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1263 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1264 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1265 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1266 
1267 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1268 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1269 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1270 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1271 
1272 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1273 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1274 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1275 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1276 
1277 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1278 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1279 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1280 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1281 
1282 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1283 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1284 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1285 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1286 
1287 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1288 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1289 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1290 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1291 
1292 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1293 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1294 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1295 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1296 
1297 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1298 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1299 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1300 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1301 
1302 /* Vector Integer Min/Max Instructions */
1303 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1304 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1305 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1306 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1307 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1308 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1309 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1310 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1311 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1312 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1313 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1314 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1315 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1316 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1317 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1318 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1319 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1320 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1321 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1322 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1323 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1324 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1325 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1326 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1327 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1328 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1329 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1330 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1331 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1332 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1333 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1334 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1335 
1336 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1337 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1338 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1339 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1340 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1341 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1342 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1343 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1344 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1345 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1346 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1347 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1348 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1349 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1350 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1351 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1352 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1353 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1354 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1355 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1356 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1357 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1358 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1359 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1360 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1361 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1362 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1363 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1364 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1365 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1366 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1367 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1368 
1369 /* Vector Single-Width Integer Multiply Instructions */
1370 #define DO_MUL(N, M) (N * M)
1371 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1372 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1373 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1374 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1375 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1376 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1377 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1378 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1379 
1380 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1381 {
1382     return (int16_t)s2 * (int16_t)s1 >> 8;
1383 }
1384 
1385 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1386 {
1387     return (int32_t)s2 * (int32_t)s1 >> 16;
1388 }
1389 
1390 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1391 {
1392     return (int64_t)s2 * (int64_t)s1 >> 32;
1393 }
1394 
1395 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1396 {
1397     uint64_t hi_64, lo_64;
1398 
1399     muls64(&lo_64, &hi_64, s1, s2);
1400     return hi_64;
1401 }
1402 
1403 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1404 {
1405     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1406 }
1407 
1408 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1409 {
1410     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1411 }
1412 
1413 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1414 {
1415     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1416 }
1417 
1418 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1419 {
1420     uint64_t hi_64, lo_64;
1421 
1422     mulu64(&lo_64, &hi_64, s2, s1);
1423     return hi_64;
1424 }
1425 
1426 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1427 {
1428     return (int16_t)s2 * (uint16_t)s1 >> 8;
1429 }
1430 
1431 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1432 {
1433     return (int32_t)s2 * (uint32_t)s1 >> 16;
1434 }
1435 
1436 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1437 {
1438     return (int64_t)s2 * (uint64_t)s1 >> 32;
1439 }
1440 
1441 /*
1442  * Let  A = signed operand,
1443  *      B = unsigned operand
1444  *      P = mulu64(A, B), unsigned product
1445  *
1446  * LET  X = 2 ** 64  - A, 2's complement of A
1447  *      SP = signed product
1448  * THEN
1449  *      IF A < 0
1450  *          SP = -X * B
1451  *             = -(2 ** 64 - A) * B
1452  *             = A * B - 2 ** 64 * B
1453  *             = P - 2 ** 64 * B
1454  *      ELSE
1455  *          SP = P
1456  * THEN
1457  *      HI_P -= (A < 0 ? B : 0)
1458  */
1459 
1460 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1461 {
1462     uint64_t hi_64, lo_64;
1463 
1464     mulu64(&lo_64, &hi_64, s2, s1);
1465 
1466     hi_64 -= s2 < 0 ? s1 : 0;
1467     return hi_64;
1468 }
1469 
1470 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1471 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1472 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1473 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1474 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1475 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1476 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1477 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1478 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1479 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1480 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1481 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1482 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1483 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1484 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1485 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1486 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1487 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1488 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1489 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1490 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1491 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1492 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1493 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1494 
1495 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1496 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1497 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1498 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1499 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1500 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1501 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1502 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1503 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1504 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1505 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1506 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1507 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1508 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1509 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1510 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1511 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1512 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1513 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1514 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1515 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1516 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1517 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1518 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1519 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1520 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1521 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1522 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1523 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1524 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1525 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1526 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1527 
1528 /* Vector Integer Divide Instructions */
1529 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1530 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1531 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1532         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1533 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1534         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1535 
1536 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1537 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1538 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1539 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1540 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1541 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1542 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1543 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1544 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1545 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1546 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1547 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1548 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1549 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1550 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1551 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1552 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1553 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1554 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1555 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1556 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1557 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1558 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1559 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1560 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1561 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1562 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1563 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1564 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1565 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1566 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1567 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1568 
1569 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1570 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1571 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1572 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1573 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1574 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1575 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1576 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1577 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1578 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1579 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1580 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1581 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1582 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1583 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1584 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1585 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1586 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1587 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1588 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1589 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1590 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1591 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1592 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1593 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1594 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1595 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1596 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1597 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1598 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1599 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1600 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1601 
1602 /* Vector Widening Integer Multiply Instructions */
1603 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1604 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1605 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1606 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1607 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1608 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1609 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1610 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1611 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1612 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1613 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1614 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1615 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1616 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1617 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1618 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1619 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1620 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1621 
1622 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1623 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1624 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1625 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1626 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1627 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1628 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1629 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1630 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1631 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1632 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1633 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1634 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1635 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1636 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1637 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1638 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1639 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1640 
1641 /* Vector Single-Width Integer Multiply-Add Instructions */
1642 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1643 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1644 {                                                                  \
1645     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1646     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1647     TD d = *((TD *)vd + HD(i));                                    \
1648     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1649 }
1650 
1651 #define DO_MACC(N, M, D) (M * N + D)
1652 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1653 #define DO_MADD(N, M, D) (M * D + N)
1654 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1655 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1656 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1657 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1658 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1659 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1660 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1661 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1662 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1663 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1664 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1665 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1666 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1667 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1668 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1669 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1670 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1671 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1672 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1673 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1674 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1675 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1676 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1677 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1678 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1679 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1680 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1681 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1682 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1683 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1684 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1685 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1686 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1687 
1688 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1689 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1690 {                                                                   \
1691     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1692     TD d = *((TD *)vd + HD(i));                                     \
1693     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1694 }
1695 
1696 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1697 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1698 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1699 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1700 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1701 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1702 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1703 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1704 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1705 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1706 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1707 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1708 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1709 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1710 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1711 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1712 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1713 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1714 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1715 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1716 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1717 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1718 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1719 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1720 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1721 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1722 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1723 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1724 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1725 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1726 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1727 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1728 
1729 /* Vector Widening Integer Multiply-Add Instructions */
1730 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1731 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1732 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1733 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1734 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1735 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1736 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1737 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1738 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1739 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1740 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1741 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1742 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1743 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1744 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1745 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1746 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1747 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1748 
1749 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1750 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1751 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1752 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1753 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1754 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1755 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1756 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1757 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1758 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1759 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1760 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1761 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1762 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1763 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1764 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1765 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1766 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1767 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1768 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1769 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1770 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1771 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1772 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1773 
1774 /* Vector Integer Merge and Move Instructions */
1775 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1776 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1777                   uint32_t desc)                                     \
1778 {                                                                    \
1779     uint32_t vl = env->vl;                                           \
1780     uint32_t i;                                                      \
1781                                                                      \
1782     for (i = env->vstart; i < vl; i++) {                             \
1783         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1784         *((ETYPE *)vd + H(i)) = s1;                                  \
1785     }                                                                \
1786     env->vstart = 0;                                                 \
1787 }
1788 
1789 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1790 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1791 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1792 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1793 
1794 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1795 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1796                   uint32_t desc)                                     \
1797 {                                                                    \
1798     uint32_t vl = env->vl;                                           \
1799     uint32_t i;                                                      \
1800                                                                      \
1801     for (i = env->vstart; i < vl; i++) {                             \
1802         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1803     }                                                                \
1804     env->vstart = 0;                                                 \
1805 }
1806 
1807 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1808 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1809 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1810 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1811 
1812 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1813 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1814                   CPURISCVState *env, uint32_t desc)                 \
1815 {                                                                    \
1816     uint32_t vl = env->vl;                                           \
1817     uint32_t i;                                                      \
1818                                                                      \
1819     for (i = env->vstart; i < vl; i++) {                             \
1820         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1821         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1822     }                                                                \
1823     env->vstart = 0;                                                 \
1824 }
1825 
1826 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1827 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1828 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1829 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1830 
1831 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1832 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1833                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1834 {                                                                    \
1835     uint32_t vl = env->vl;                                           \
1836     uint32_t i;                                                      \
1837                                                                      \
1838     for (i = env->vstart; i < vl; i++) {                             \
1839         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1840         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1841                    (ETYPE)(target_long)s1);                          \
1842         *((ETYPE *)vd + H(i)) = d;                                   \
1843     }                                                                \
1844     env->vstart = 0;                                                 \
1845 }
1846 
1847 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1848 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1849 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1850 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1851 
1852 /*
1853  *** Vector Fixed-Point Arithmetic Instructions
1854  */
1855 
1856 /* Vector Single-Width Saturating Add and Subtract */
1857 
1858 /*
1859  * As fixed point instructions probably have round mode and saturation,
1860  * define common macros for fixed point here.
1861  */
1862 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1863                           CPURISCVState *env, int vxrm);
1864 
1865 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1866 static inline void                                                  \
1867 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1868           CPURISCVState *env, int vxrm)                             \
1869 {                                                                   \
1870     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1871     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1872     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1873 }
1874 
1875 static inline void
1876 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1877              CPURISCVState *env,
1878              uint32_t vl, uint32_t vm, int vxrm,
1879              opivv2_rm_fn *fn)
1880 {
1881     for (uint32_t i = env->vstart; i < vl; i++) {
1882         if (!vm && !vext_elem_mask(v0, i)) {
1883             continue;
1884         }
1885         fn(vd, vs1, vs2, i, env, vxrm);
1886     }
1887     env->vstart = 0;
1888 }
1889 
1890 static inline void
1891 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1892              CPURISCVState *env,
1893              uint32_t desc, uint32_t esz, uint32_t dsz,
1894              opivv2_rm_fn *fn)
1895 {
1896     uint32_t vm = vext_vm(desc);
1897     uint32_t vl = env->vl;
1898 
1899     switch (env->vxrm) {
1900     case 0: /* rnu */
1901         vext_vv_rm_1(vd, v0, vs1, vs2,
1902                      env, vl, vm, 0, fn);
1903         break;
1904     case 1: /* rne */
1905         vext_vv_rm_1(vd, v0, vs1, vs2,
1906                      env, vl, vm, 1, fn);
1907         break;
1908     case 2: /* rdn */
1909         vext_vv_rm_1(vd, v0, vs1, vs2,
1910                      env, vl, vm, 2, fn);
1911         break;
1912     default: /* rod */
1913         vext_vv_rm_1(vd, v0, vs1, vs2,
1914                      env, vl, vm, 3, fn);
1915         break;
1916     }
1917 }
1918 
1919 /* generate helpers for fixed point instructions with OPIVV format */
1920 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1921 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1922                   CPURISCVState *env, uint32_t desc)            \
1923 {                                                               \
1924     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1925                  do_##NAME);                                    \
1926 }
1927 
1928 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1929 {
1930     uint8_t res = a + b;
1931     if (res < a) {
1932         res = UINT8_MAX;
1933         env->vxsat = 0x1;
1934     }
1935     return res;
1936 }
1937 
1938 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1939                                uint16_t b)
1940 {
1941     uint16_t res = a + b;
1942     if (res < a) {
1943         res = UINT16_MAX;
1944         env->vxsat = 0x1;
1945     }
1946     return res;
1947 }
1948 
1949 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1950                                uint32_t b)
1951 {
1952     uint32_t res = a + b;
1953     if (res < a) {
1954         res = UINT32_MAX;
1955         env->vxsat = 0x1;
1956     }
1957     return res;
1958 }
1959 
1960 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1961                                uint64_t b)
1962 {
1963     uint64_t res = a + b;
1964     if (res < a) {
1965         res = UINT64_MAX;
1966         env->vxsat = 0x1;
1967     }
1968     return res;
1969 }
1970 
1971 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1972 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1973 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
1974 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
1975 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
1976 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
1977 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
1978 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
1979 
1980 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
1981                           CPURISCVState *env, int vxrm);
1982 
1983 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
1984 static inline void                                                  \
1985 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
1986           CPURISCVState *env, int vxrm)                             \
1987 {                                                                   \
1988     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1989     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
1990 }
1991 
1992 static inline void
1993 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
1994              CPURISCVState *env,
1995              uint32_t vl, uint32_t vm, int vxrm,
1996              opivx2_rm_fn *fn)
1997 {
1998     for (uint32_t i = env->vstart; i < vl; i++) {
1999         if (!vm && !vext_elem_mask(v0, i)) {
2000             continue;
2001         }
2002         fn(vd, s1, vs2, i, env, vxrm);
2003     }
2004     env->vstart = 0;
2005 }
2006 
2007 static inline void
2008 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2009              CPURISCVState *env,
2010              uint32_t desc, uint32_t esz, uint32_t dsz,
2011              opivx2_rm_fn *fn)
2012 {
2013     uint32_t vm = vext_vm(desc);
2014     uint32_t vl = env->vl;
2015 
2016     switch (env->vxrm) {
2017     case 0: /* rnu */
2018         vext_vx_rm_1(vd, v0, s1, vs2,
2019                      env, vl, vm, 0, fn);
2020         break;
2021     case 1: /* rne */
2022         vext_vx_rm_1(vd, v0, s1, vs2,
2023                      env, vl, vm, 1, fn);
2024         break;
2025     case 2: /* rdn */
2026         vext_vx_rm_1(vd, v0, s1, vs2,
2027                      env, vl, vm, 2, fn);
2028         break;
2029     default: /* rod */
2030         vext_vx_rm_1(vd, v0, s1, vs2,
2031                      env, vl, vm, 3, fn);
2032         break;
2033     }
2034 }
2035 
2036 /* generate helpers for fixed point instructions with OPIVX format */
2037 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2038 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2039         void *vs2, CPURISCVState *env, uint32_t desc)     \
2040 {                                                         \
2041     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2042                  do_##NAME);                              \
2043 }
2044 
2045 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2046 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2047 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2048 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2049 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2050 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2051 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2052 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2053 
2054 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2055 {
2056     int8_t res = a + b;
2057     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2058         res = a > 0 ? INT8_MAX : INT8_MIN;
2059         env->vxsat = 0x1;
2060     }
2061     return res;
2062 }
2063 
2064 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2065 {
2066     int16_t res = a + b;
2067     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2068         res = a > 0 ? INT16_MAX : INT16_MIN;
2069         env->vxsat = 0x1;
2070     }
2071     return res;
2072 }
2073 
2074 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2075 {
2076     int32_t res = a + b;
2077     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2078         res = a > 0 ? INT32_MAX : INT32_MIN;
2079         env->vxsat = 0x1;
2080     }
2081     return res;
2082 }
2083 
2084 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2085 {
2086     int64_t res = a + b;
2087     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2088         res = a > 0 ? INT64_MAX : INT64_MIN;
2089         env->vxsat = 0x1;
2090     }
2091     return res;
2092 }
2093 
2094 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2095 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2096 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2097 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2098 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2099 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2100 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2101 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2102 
2103 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2104 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2105 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2106 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2107 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2108 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2109 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2110 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2111 
2112 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2113 {
2114     uint8_t res = a - b;
2115     if (res > a) {
2116         res = 0;
2117         env->vxsat = 0x1;
2118     }
2119     return res;
2120 }
2121 
2122 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2123                                uint16_t b)
2124 {
2125     uint16_t res = a - b;
2126     if (res > a) {
2127         res = 0;
2128         env->vxsat = 0x1;
2129     }
2130     return res;
2131 }
2132 
2133 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2134                                uint32_t b)
2135 {
2136     uint32_t res = a - b;
2137     if (res > a) {
2138         res = 0;
2139         env->vxsat = 0x1;
2140     }
2141     return res;
2142 }
2143 
2144 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2145                                uint64_t b)
2146 {
2147     uint64_t res = a - b;
2148     if (res > a) {
2149         res = 0;
2150         env->vxsat = 0x1;
2151     }
2152     return res;
2153 }
2154 
2155 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2156 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2157 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2158 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2159 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2160 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2161 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2162 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2163 
2164 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2165 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2166 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2167 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2168 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2169 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2170 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2171 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2172 
2173 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2174 {
2175     int8_t res = a - b;
2176     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2177         res = a >= 0 ? INT8_MAX : INT8_MIN;
2178         env->vxsat = 0x1;
2179     }
2180     return res;
2181 }
2182 
2183 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2184 {
2185     int16_t res = a - b;
2186     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2187         res = a >= 0 ? INT16_MAX : INT16_MIN;
2188         env->vxsat = 0x1;
2189     }
2190     return res;
2191 }
2192 
2193 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2194 {
2195     int32_t res = a - b;
2196     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2197         res = a >= 0 ? INT32_MAX : INT32_MIN;
2198         env->vxsat = 0x1;
2199     }
2200     return res;
2201 }
2202 
2203 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2204 {
2205     int64_t res = a - b;
2206     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2207         res = a >= 0 ? INT64_MAX : INT64_MIN;
2208         env->vxsat = 0x1;
2209     }
2210     return res;
2211 }
2212 
2213 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2214 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2215 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2216 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2217 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2218 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2219 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2220 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2221 
2222 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2223 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2224 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2225 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2226 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2227 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2228 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2229 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2230 
2231 /* Vector Single-Width Averaging Add and Subtract */
2232 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2233 {
2234     uint8_t d = extract64(v, shift, 1);
2235     uint8_t d1;
2236     uint64_t D1, D2;
2237 
2238     if (shift == 0 || shift > 64) {
2239         return 0;
2240     }
2241 
2242     d1 = extract64(v, shift - 1, 1);
2243     D1 = extract64(v, 0, shift);
2244     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2245         return d1;
2246     } else if (vxrm == 1) { /* round-to-nearest-even */
2247         if (shift > 1) {
2248             D2 = extract64(v, 0, shift - 1);
2249             return d1 & ((D2 != 0) | d);
2250         } else {
2251             return d1 & d;
2252         }
2253     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2254         return !d & (D1 != 0);
2255     }
2256     return 0; /* round-down (truncate) */
2257 }
2258 
2259 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2260 {
2261     int64_t res = (int64_t)a + b;
2262     uint8_t round = get_round(vxrm, res, 1);
2263 
2264     return (res >> 1) + round;
2265 }
2266 
2267 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2268 {
2269     int64_t res = a + b;
2270     uint8_t round = get_round(vxrm, res, 1);
2271     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2272 
2273     /* With signed overflow, bit 64 is inverse of bit 63. */
2274     return ((res >> 1) ^ over) + round;
2275 }
2276 
2277 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2278 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2279 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2280 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2281 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2282 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2283 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2284 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2285 
2286 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2287 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2288 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2289 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2290 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2291 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2292 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2293 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2294 
2295 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2296                                uint32_t a, uint32_t b)
2297 {
2298     uint64_t res = (uint64_t)a + b;
2299     uint8_t round = get_round(vxrm, res, 1);
2300 
2301     return (res >> 1) + round;
2302 }
2303 
2304 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2305                                uint64_t a, uint64_t b)
2306 {
2307     uint64_t res = a + b;
2308     uint8_t round = get_round(vxrm, res, 1);
2309     uint64_t over = (uint64_t)(res < a) << 63;
2310 
2311     return ((res >> 1) | over) + round;
2312 }
2313 
2314 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2315 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2316 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2317 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2318 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2319 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2320 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2321 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2322 
2323 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2324 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2325 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2326 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2327 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2328 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2329 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2330 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2331 
2332 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2333 {
2334     int64_t res = (int64_t)a - b;
2335     uint8_t round = get_round(vxrm, res, 1);
2336 
2337     return (res >> 1) + round;
2338 }
2339 
2340 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2341 {
2342     int64_t res = (int64_t)a - b;
2343     uint8_t round = get_round(vxrm, res, 1);
2344     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2345 
2346     /* With signed overflow, bit 64 is inverse of bit 63. */
2347     return ((res >> 1) ^ over) + round;
2348 }
2349 
2350 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2351 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2352 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2353 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2354 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2355 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2356 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2357 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2358 
2359 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2360 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2361 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2362 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2363 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2364 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2365 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2366 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2367 
2368 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2369                                uint32_t a, uint32_t b)
2370 {
2371     int64_t res = (int64_t)a - b;
2372     uint8_t round = get_round(vxrm, res, 1);
2373 
2374     return (res >> 1) + round;
2375 }
2376 
2377 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2378                                uint64_t a, uint64_t b)
2379 {
2380     uint64_t res = (uint64_t)a - b;
2381     uint8_t round = get_round(vxrm, res, 1);
2382     uint64_t over = (uint64_t)(res > a) << 63;
2383 
2384     return ((res >> 1) | over) + round;
2385 }
2386 
2387 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2388 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2389 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2390 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2391 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2392 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2393 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2394 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2395 
2396 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2397 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2398 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2399 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2400 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2401 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2402 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2403 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2404 
2405 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2406 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2407 {
2408     uint8_t round;
2409     int16_t res;
2410 
2411     res = (int16_t)a * (int16_t)b;
2412     round = get_round(vxrm, res, 7);
2413     res   = (res >> 7) + round;
2414 
2415     if (res > INT8_MAX) {
2416         env->vxsat = 0x1;
2417         return INT8_MAX;
2418     } else if (res < INT8_MIN) {
2419         env->vxsat = 0x1;
2420         return INT8_MIN;
2421     } else {
2422         return res;
2423     }
2424 }
2425 
2426 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2427 {
2428     uint8_t round;
2429     int32_t res;
2430 
2431     res = (int32_t)a * (int32_t)b;
2432     round = get_round(vxrm, res, 15);
2433     res   = (res >> 15) + round;
2434 
2435     if (res > INT16_MAX) {
2436         env->vxsat = 0x1;
2437         return INT16_MAX;
2438     } else if (res < INT16_MIN) {
2439         env->vxsat = 0x1;
2440         return INT16_MIN;
2441     } else {
2442         return res;
2443     }
2444 }
2445 
2446 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2447 {
2448     uint8_t round;
2449     int64_t res;
2450 
2451     res = (int64_t)a * (int64_t)b;
2452     round = get_round(vxrm, res, 31);
2453     res   = (res >> 31) + round;
2454 
2455     if (res > INT32_MAX) {
2456         env->vxsat = 0x1;
2457         return INT32_MAX;
2458     } else if (res < INT32_MIN) {
2459         env->vxsat = 0x1;
2460         return INT32_MIN;
2461     } else {
2462         return res;
2463     }
2464 }
2465 
2466 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2467 {
2468     uint8_t round;
2469     uint64_t hi_64, lo_64;
2470     int64_t res;
2471 
2472     if (a == INT64_MIN && b == INT64_MIN) {
2473         env->vxsat = 1;
2474         return INT64_MAX;
2475     }
2476 
2477     muls64(&lo_64, &hi_64, a, b);
2478     round = get_round(vxrm, lo_64, 63);
2479     /*
2480      * Cannot overflow, as there are always
2481      * 2 sign bits after multiply.
2482      */
2483     res = (hi_64 << 1) | (lo_64 >> 63);
2484     if (round) {
2485         if (res == INT64_MAX) {
2486             env->vxsat = 1;
2487         } else {
2488             res += 1;
2489         }
2490     }
2491     return res;
2492 }
2493 
2494 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2495 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2496 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2497 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2498 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2499 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2500 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2501 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2502 
2503 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2504 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2505 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2506 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2507 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2508 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2509 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2510 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2511 
2512 /* Vector Single-Width Scaling Shift Instructions */
2513 static inline uint8_t
2514 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2515 {
2516     uint8_t round, shift = b & 0x7;
2517     uint8_t res;
2518 
2519     round = get_round(vxrm, a, shift);
2520     res   = (a >> shift)  + round;
2521     return res;
2522 }
2523 static inline uint16_t
2524 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2525 {
2526     uint8_t round, shift = b & 0xf;
2527     uint16_t res;
2528 
2529     round = get_round(vxrm, a, shift);
2530     res   = (a >> shift)  + round;
2531     return res;
2532 }
2533 static inline uint32_t
2534 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2535 {
2536     uint8_t round, shift = b & 0x1f;
2537     uint32_t res;
2538 
2539     round = get_round(vxrm, a, shift);
2540     res   = (a >> shift)  + round;
2541     return res;
2542 }
2543 static inline uint64_t
2544 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2545 {
2546     uint8_t round, shift = b & 0x3f;
2547     uint64_t res;
2548 
2549     round = get_round(vxrm, a, shift);
2550     res   = (a >> shift)  + round;
2551     return res;
2552 }
2553 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2554 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2555 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2556 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2557 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2558 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2559 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2560 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2561 
2562 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2563 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2564 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2565 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2566 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2567 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2568 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2569 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2570 
2571 static inline int8_t
2572 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2573 {
2574     uint8_t round, shift = b & 0x7;
2575     int8_t res;
2576 
2577     round = get_round(vxrm, a, shift);
2578     res   = (a >> shift)  + round;
2579     return res;
2580 }
2581 static inline int16_t
2582 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2583 {
2584     uint8_t round, shift = b & 0xf;
2585     int16_t res;
2586 
2587     round = get_round(vxrm, a, shift);
2588     res   = (a >> shift)  + round;
2589     return res;
2590 }
2591 static inline int32_t
2592 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2593 {
2594     uint8_t round, shift = b & 0x1f;
2595     int32_t res;
2596 
2597     round = get_round(vxrm, a, shift);
2598     res   = (a >> shift)  + round;
2599     return res;
2600 }
2601 static inline int64_t
2602 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2603 {
2604     uint8_t round, shift = b & 0x3f;
2605     int64_t res;
2606 
2607     round = get_round(vxrm, a, shift);
2608     res   = (a >> shift)  + round;
2609     return res;
2610 }
2611 
2612 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2613 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2614 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2615 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2616 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2617 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2618 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2619 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2620 
2621 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2622 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2623 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2624 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2625 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2626 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2627 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2628 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2629 
2630 /* Vector Narrowing Fixed-Point Clip Instructions */
2631 static inline int8_t
2632 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2633 {
2634     uint8_t round, shift = b & 0xf;
2635     int16_t res;
2636 
2637     round = get_round(vxrm, a, shift);
2638     res   = (a >> shift)  + round;
2639     if (res > INT8_MAX) {
2640         env->vxsat = 0x1;
2641         return INT8_MAX;
2642     } else if (res < INT8_MIN) {
2643         env->vxsat = 0x1;
2644         return INT8_MIN;
2645     } else {
2646         return res;
2647     }
2648 }
2649 
2650 static inline int16_t
2651 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2652 {
2653     uint8_t round, shift = b & 0x1f;
2654     int32_t res;
2655 
2656     round = get_round(vxrm, a, shift);
2657     res   = (a >> shift)  + round;
2658     if (res > INT16_MAX) {
2659         env->vxsat = 0x1;
2660         return INT16_MAX;
2661     } else if (res < INT16_MIN) {
2662         env->vxsat = 0x1;
2663         return INT16_MIN;
2664     } else {
2665         return res;
2666     }
2667 }
2668 
2669 static inline int32_t
2670 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2671 {
2672     uint8_t round, shift = b & 0x3f;
2673     int64_t res;
2674 
2675     round = get_round(vxrm, a, shift);
2676     res   = (a >> shift)  + round;
2677     if (res > INT32_MAX) {
2678         env->vxsat = 0x1;
2679         return INT32_MAX;
2680     } else if (res < INT32_MIN) {
2681         env->vxsat = 0x1;
2682         return INT32_MIN;
2683     } else {
2684         return res;
2685     }
2686 }
2687 
2688 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2689 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2690 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2691 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2692 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2693 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2694 
2695 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2696 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2697 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2698 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2699 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2700 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2701 
2702 static inline uint8_t
2703 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2704 {
2705     uint8_t round, shift = b & 0xf;
2706     uint16_t res;
2707 
2708     round = get_round(vxrm, a, shift);
2709     res   = (a >> shift)  + round;
2710     if (res > UINT8_MAX) {
2711         env->vxsat = 0x1;
2712         return UINT8_MAX;
2713     } else {
2714         return res;
2715     }
2716 }
2717 
2718 static inline uint16_t
2719 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2720 {
2721     uint8_t round, shift = b & 0x1f;
2722     uint32_t res;
2723 
2724     round = get_round(vxrm, a, shift);
2725     res   = (a >> shift)  + round;
2726     if (res > UINT16_MAX) {
2727         env->vxsat = 0x1;
2728         return UINT16_MAX;
2729     } else {
2730         return res;
2731     }
2732 }
2733 
2734 static inline uint32_t
2735 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2736 {
2737     uint8_t round, shift = b & 0x3f;
2738     uint64_t res;
2739 
2740     round = get_round(vxrm, a, shift);
2741     res   = (a >> shift)  + round;
2742     if (res > UINT32_MAX) {
2743         env->vxsat = 0x1;
2744         return UINT32_MAX;
2745     } else {
2746         return res;
2747     }
2748 }
2749 
2750 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2751 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2752 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2753 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2754 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2755 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2756 
2757 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2758 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2759 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2760 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2761 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2762 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2763 
2764 /*
2765  *** Vector Float Point Arithmetic Instructions
2766  */
2767 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2768 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2769 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2770                       CPURISCVState *env)                      \
2771 {                                                              \
2772     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2773     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2774     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2775 }
2776 
2777 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2778 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2779                   void *vs2, CPURISCVState *env,          \
2780                   uint32_t desc)                          \
2781 {                                                         \
2782     uint32_t vm = vext_vm(desc);                          \
2783     uint32_t vl = env->vl;                                \
2784     uint32_t i;                                           \
2785                                                           \
2786     for (i = env->vstart; i < vl; i++) {                  \
2787         if (!vm && !vext_elem_mask(v0, i)) {              \
2788             continue;                                     \
2789         }                                                 \
2790         do_##NAME(vd, vs1, vs2, i, env);                  \
2791     }                                                     \
2792     env->vstart = 0;                                      \
2793 }
2794 
2795 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2796 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2797 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2798 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2799 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2800 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2801 
2802 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2803 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2804                       CPURISCVState *env)                      \
2805 {                                                              \
2806     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2807     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2808 }
2809 
2810 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2811 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2812                   void *vs2, CPURISCVState *env,          \
2813                   uint32_t desc)                          \
2814 {                                                         \
2815     uint32_t vm = vext_vm(desc);                          \
2816     uint32_t vl = env->vl;                                \
2817     uint32_t i;                                           \
2818                                                           \
2819     for (i = env->vstart; i < vl; i++) {                  \
2820         if (!vm && !vext_elem_mask(v0, i)) {              \
2821             continue;                                     \
2822         }                                                 \
2823         do_##NAME(vd, s1, vs2, i, env);                   \
2824     }                                                     \
2825     env->vstart = 0;                                      \
2826 }
2827 
2828 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2829 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2830 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2831 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2832 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2833 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2834 
2835 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2836 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2837 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2838 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2839 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2840 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2841 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2842 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2843 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2844 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2845 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2846 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2847 
2848 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2849 {
2850     return float16_sub(b, a, s);
2851 }
2852 
2853 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2854 {
2855     return float32_sub(b, a, s);
2856 }
2857 
2858 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2859 {
2860     return float64_sub(b, a, s);
2861 }
2862 
2863 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2864 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2865 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2866 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2867 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2868 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2869 
2870 /* Vector Widening Floating-Point Add/Subtract Instructions */
2871 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2872 {
2873     return float32_add(float16_to_float32(a, true, s),
2874             float16_to_float32(b, true, s), s);
2875 }
2876 
2877 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2878 {
2879     return float64_add(float32_to_float64(a, s),
2880             float32_to_float64(b, s), s);
2881 
2882 }
2883 
2884 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2885 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2886 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2887 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2888 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2889 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2890 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2891 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2892 
2893 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2894 {
2895     return float32_sub(float16_to_float32(a, true, s),
2896             float16_to_float32(b, true, s), s);
2897 }
2898 
2899 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2900 {
2901     return float64_sub(float32_to_float64(a, s),
2902             float32_to_float64(b, s), s);
2903 
2904 }
2905 
2906 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2907 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2908 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2909 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2910 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2911 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2912 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2913 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2914 
2915 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2916 {
2917     return float32_add(a, float16_to_float32(b, true, s), s);
2918 }
2919 
2920 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2921 {
2922     return float64_add(a, float32_to_float64(b, s), s);
2923 }
2924 
2925 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2926 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2927 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2928 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2929 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2930 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2931 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2932 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2933 
2934 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2935 {
2936     return float32_sub(a, float16_to_float32(b, true, s), s);
2937 }
2938 
2939 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2940 {
2941     return float64_sub(a, float32_to_float64(b, s), s);
2942 }
2943 
2944 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2945 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2946 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2947 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2948 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2949 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2950 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2951 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2952 
2953 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2954 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2955 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2956 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2957 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2958 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2959 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2960 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2961 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2962 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2963 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2964 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2965 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2966 
2967 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2968 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2969 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
2970 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
2971 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
2972 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
2973 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
2974 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
2975 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
2976 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
2977 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
2978 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
2979 
2980 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
2981 {
2982     return float16_div(b, a, s);
2983 }
2984 
2985 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
2986 {
2987     return float32_div(b, a, s);
2988 }
2989 
2990 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
2991 {
2992     return float64_div(b, a, s);
2993 }
2994 
2995 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
2996 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
2997 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
2998 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
2999 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3000 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3001 
3002 /* Vector Widening Floating-Point Multiply */
3003 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3004 {
3005     return float32_mul(float16_to_float32(a, true, s),
3006             float16_to_float32(b, true, s), s);
3007 }
3008 
3009 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3010 {
3011     return float64_mul(float32_to_float64(a, s),
3012             float32_to_float64(b, s), s);
3013 
3014 }
3015 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3016 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3017 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3018 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3019 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3020 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3021 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3022 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3023 
3024 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3025 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3026 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3027         CPURISCVState *env)                                        \
3028 {                                                                  \
3029     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3030     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3031     TD d = *((TD *)vd + HD(i));                                    \
3032     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3033 }
3034 
3035 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3036 {
3037     return float16_muladd(a, b, d, 0, s);
3038 }
3039 
3040 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3041 {
3042     return float32_muladd(a, b, d, 0, s);
3043 }
3044 
3045 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3046 {
3047     return float64_muladd(a, b, d, 0, s);
3048 }
3049 
3050 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3051 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3052 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3053 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3054 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3055 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3056 
3057 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3058 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3059         CPURISCVState *env)                                       \
3060 {                                                                 \
3061     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3062     TD d = *((TD *)vd + HD(i));                                   \
3063     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3064 }
3065 
3066 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3067 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3068 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3069 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3070 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3071 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3072 
3073 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3074 {
3075     return float16_muladd(a, b, d,
3076             float_muladd_negate_c | float_muladd_negate_product, s);
3077 }
3078 
3079 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3080 {
3081     return float32_muladd(a, b, d,
3082             float_muladd_negate_c | float_muladd_negate_product, s);
3083 }
3084 
3085 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3086 {
3087     return float64_muladd(a, b, d,
3088             float_muladd_negate_c | float_muladd_negate_product, s);
3089 }
3090 
3091 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3092 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3093 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3094 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3095 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3096 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3097 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3098 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3099 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3100 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3101 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3102 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3103 
3104 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3105 {
3106     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3107 }
3108 
3109 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3110 {
3111     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3112 }
3113 
3114 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3115 {
3116     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3117 }
3118 
3119 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3120 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3121 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3122 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3123 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3124 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3125 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3126 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3127 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3128 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3129 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3130 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3131 
3132 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3133 {
3134     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3135 }
3136 
3137 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3138 {
3139     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3140 }
3141 
3142 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3143 {
3144     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3145 }
3146 
3147 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3148 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3149 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3150 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3151 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3152 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3153 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3154 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3155 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3156 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3157 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3158 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3159 
3160 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3161 {
3162     return float16_muladd(d, b, a, 0, s);
3163 }
3164 
3165 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3166 {
3167     return float32_muladd(d, b, a, 0, s);
3168 }
3169 
3170 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3171 {
3172     return float64_muladd(d, b, a, 0, s);
3173 }
3174 
3175 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3176 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3177 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3178 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3179 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3180 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3181 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3182 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3183 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3184 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3185 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3186 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3187 
3188 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3189 {
3190     return float16_muladd(d, b, a,
3191             float_muladd_negate_c | float_muladd_negate_product, s);
3192 }
3193 
3194 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3195 {
3196     return float32_muladd(d, b, a,
3197             float_muladd_negate_c | float_muladd_negate_product, s);
3198 }
3199 
3200 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3201 {
3202     return float64_muladd(d, b, a,
3203             float_muladd_negate_c | float_muladd_negate_product, s);
3204 }
3205 
3206 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3207 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3208 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3209 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3210 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3211 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3212 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3213 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3214 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3215 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3216 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3217 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3218 
3219 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3220 {
3221     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3222 }
3223 
3224 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3225 {
3226     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3227 }
3228 
3229 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3230 {
3231     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3232 }
3233 
3234 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3235 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3236 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3237 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3238 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3239 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3240 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3241 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3242 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3243 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3244 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3245 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3246 
3247 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3248 {
3249     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3250 }
3251 
3252 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3253 {
3254     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3255 }
3256 
3257 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3258 {
3259     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3260 }
3261 
3262 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3263 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3264 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3265 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3266 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3267 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3268 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3269 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3270 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3271 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3272 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3273 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3274 
3275 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3276 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3277 {
3278     return float32_muladd(float16_to_float32(a, true, s),
3279                         float16_to_float32(b, true, s), d, 0, s);
3280 }
3281 
3282 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3283 {
3284     return float64_muladd(float32_to_float64(a, s),
3285                         float32_to_float64(b, s), d, 0, s);
3286 }
3287 
3288 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3289 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3290 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3291 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3292 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3293 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3294 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3295 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3296 
3297 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3298 {
3299     return float32_muladd(float16_to_float32(a, true, s),
3300                         float16_to_float32(b, true, s), d,
3301                         float_muladd_negate_c | float_muladd_negate_product, s);
3302 }
3303 
3304 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3305 {
3306     return float64_muladd(float32_to_float64(a, s),
3307                         float32_to_float64(b, s), d,
3308                         float_muladd_negate_c | float_muladd_negate_product, s);
3309 }
3310 
3311 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3312 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3313 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3314 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3315 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3316 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3317 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3318 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3319 
3320 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3321 {
3322     return float32_muladd(float16_to_float32(a, true, s),
3323                         float16_to_float32(b, true, s), d,
3324                         float_muladd_negate_c, s);
3325 }
3326 
3327 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3328 {
3329     return float64_muladd(float32_to_float64(a, s),
3330                         float32_to_float64(b, s), d,
3331                         float_muladd_negate_c, s);
3332 }
3333 
3334 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3335 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3336 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3337 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3338 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3339 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3340 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3341 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3342 
3343 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3344 {
3345     return float32_muladd(float16_to_float32(a, true, s),
3346                         float16_to_float32(b, true, s), d,
3347                         float_muladd_negate_product, s);
3348 }
3349 
3350 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3351 {
3352     return float64_muladd(float32_to_float64(a, s),
3353                         float32_to_float64(b, s), d,
3354                         float_muladd_negate_product, s);
3355 }
3356 
3357 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3358 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3359 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3360 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3361 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3362 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3363 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3364 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3365 
3366 /* Vector Floating-Point Square-Root Instruction */
3367 /* (TD, T2, TX2) */
3368 #define OP_UU_H uint16_t, uint16_t, uint16_t
3369 #define OP_UU_W uint32_t, uint32_t, uint32_t
3370 #define OP_UU_D uint64_t, uint64_t, uint64_t
3371 
3372 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3373 static void do_##NAME(void *vd, void *vs2, int i,      \
3374         CPURISCVState *env)                            \
3375 {                                                      \
3376     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3377     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3378 }
3379 
3380 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3381 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3382         CPURISCVState *env, uint32_t desc)             \
3383 {                                                      \
3384     uint32_t vm = vext_vm(desc);                       \
3385     uint32_t vl = env->vl;                             \
3386     uint32_t i;                                        \
3387                                                        \
3388     if (vl == 0) {                                     \
3389         return;                                        \
3390     }                                                  \
3391     for (i = env->vstart; i < vl; i++) {               \
3392         if (!vm && !vext_elem_mask(v0, i)) {           \
3393             continue;                                  \
3394         }                                              \
3395         do_##NAME(vd, vs2, i, env);                    \
3396     }                                                  \
3397     env->vstart = 0;                                   \
3398 }
3399 
3400 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3401 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3402 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3403 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3404 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3405 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3406 
3407 /* Vector Floating-Point MIN/MAX Instructions */
3408 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3409 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3410 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3411 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3412 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3413 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3414 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3415 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3416 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3417 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3418 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3419 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3420 
3421 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3422 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3423 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3424 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3425 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3426 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3427 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3428 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3429 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3430 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3431 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3432 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3433 
3434 /* Vector Floating-Point Sign-Injection Instructions */
3435 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3436 {
3437     return deposit64(b, 0, 15, a);
3438 }
3439 
3440 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3441 {
3442     return deposit64(b, 0, 31, a);
3443 }
3444 
3445 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3446 {
3447     return deposit64(b, 0, 63, a);
3448 }
3449 
3450 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3451 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3452 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3453 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3454 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3455 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3456 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3457 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3458 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3459 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3460 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3461 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3462 
3463 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3464 {
3465     return deposit64(~b, 0, 15, a);
3466 }
3467 
3468 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3469 {
3470     return deposit64(~b, 0, 31, a);
3471 }
3472 
3473 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3474 {
3475     return deposit64(~b, 0, 63, a);
3476 }
3477 
3478 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3479 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3480 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3481 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3482 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3483 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3484 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3485 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3486 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3487 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3488 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3489 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3490 
3491 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3492 {
3493     return deposit64(b ^ a, 0, 15, a);
3494 }
3495 
3496 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3497 {
3498     return deposit64(b ^ a, 0, 31, a);
3499 }
3500 
3501 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3502 {
3503     return deposit64(b ^ a, 0, 63, a);
3504 }
3505 
3506 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3507 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3508 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3509 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3510 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3511 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3512 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3513 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3514 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3515 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3516 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3517 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3518 
3519 /* Vector Floating-Point Compare Instructions */
3520 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3521 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3522                   CPURISCVState *env, uint32_t desc)          \
3523 {                                                             \
3524     uint32_t vm = vext_vm(desc);                              \
3525     uint32_t vl = env->vl;                                    \
3526     uint32_t i;                                               \
3527                                                               \
3528     for (i = env->vstart; i < vl; i++) {                      \
3529         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3530         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3531         if (!vm && !vext_elem_mask(v0, i)) {                  \
3532             continue;                                         \
3533         }                                                     \
3534         vext_set_elem_mask(vd, i,                             \
3535                            DO_OP(s2, s1, &env->fp_status));   \
3536     }                                                         \
3537     env->vstart = 0;                                          \
3538 }
3539 
3540 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3541 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3542 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3543 
3544 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3545 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3546                   CPURISCVState *env, uint32_t desc)                \
3547 {                                                                   \
3548     uint32_t vm = vext_vm(desc);                                    \
3549     uint32_t vl = env->vl;                                          \
3550     uint32_t i;                                                     \
3551                                                                     \
3552     for (i = env->vstart; i < vl; i++) {                            \
3553         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3554         if (!vm && !vext_elem_mask(v0, i)) {                        \
3555             continue;                                               \
3556         }                                                           \
3557         vext_set_elem_mask(vd, i,                                   \
3558                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3559     }                                                               \
3560     env->vstart = 0;                                                \
3561 }
3562 
3563 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3564 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3565 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3566 
3567 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3568 {
3569     FloatRelation compare = float16_compare_quiet(a, b, s);
3570     return compare != float_relation_equal;
3571 }
3572 
3573 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3574 {
3575     FloatRelation compare = float32_compare_quiet(a, b, s);
3576     return compare != float_relation_equal;
3577 }
3578 
3579 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3580 {
3581     FloatRelation compare = float64_compare_quiet(a, b, s);
3582     return compare != float_relation_equal;
3583 }
3584 
3585 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3586 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3587 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3588 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3589 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3590 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3591 
3592 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3593 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3594 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3595 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3596 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3597 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3598 
3599 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3600 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
3601 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
3602 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
3603 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
3604 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
3605 
3606 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
3607 {
3608     FloatRelation compare = float16_compare(a, b, s);
3609     return compare == float_relation_greater;
3610 }
3611 
3612 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
3613 {
3614     FloatRelation compare = float32_compare(a, b, s);
3615     return compare == float_relation_greater;
3616 }
3617 
3618 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
3619 {
3620     FloatRelation compare = float64_compare(a, b, s);
3621     return compare == float_relation_greater;
3622 }
3623 
3624 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
3625 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
3626 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
3627 
3628 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
3629 {
3630     FloatRelation compare = float16_compare(a, b, s);
3631     return compare == float_relation_greater ||
3632            compare == float_relation_equal;
3633 }
3634 
3635 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
3636 {
3637     FloatRelation compare = float32_compare(a, b, s);
3638     return compare == float_relation_greater ||
3639            compare == float_relation_equal;
3640 }
3641 
3642 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
3643 {
3644     FloatRelation compare = float64_compare(a, b, s);
3645     return compare == float_relation_greater ||
3646            compare == float_relation_equal;
3647 }
3648 
3649 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
3650 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
3651 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
3652 
3653 /* Vector Floating-Point Classify Instruction */
3654 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
3655 static void do_##NAME(void *vd, void *vs2, int i)      \
3656 {                                                      \
3657     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3658     *((TD *)vd + HD(i)) = OP(s2);                      \
3659 }
3660 
3661 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
3662 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3663                   CPURISCVState *env, uint32_t desc)   \
3664 {                                                      \
3665     uint32_t vm = vext_vm(desc);                       \
3666     uint32_t vl = env->vl;                             \
3667     uint32_t i;                                        \
3668                                                        \
3669     for (i = env->vstart; i < vl; i++) {               \
3670         if (!vm && !vext_elem_mask(v0, i)) {           \
3671             continue;                                  \
3672         }                                              \
3673         do_##NAME(vd, vs2, i);                         \
3674     }                                                  \
3675     env->vstart = 0;                                   \
3676 }
3677 
3678 target_ulong fclass_h(uint64_t frs1)
3679 {
3680     float16 f = frs1;
3681     bool sign = float16_is_neg(f);
3682 
3683     if (float16_is_infinity(f)) {
3684         return sign ? 1 << 0 : 1 << 7;
3685     } else if (float16_is_zero(f)) {
3686         return sign ? 1 << 3 : 1 << 4;
3687     } else if (float16_is_zero_or_denormal(f)) {
3688         return sign ? 1 << 2 : 1 << 5;
3689     } else if (float16_is_any_nan(f)) {
3690         float_status s = { }; /* for snan_bit_is_one */
3691         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3692     } else {
3693         return sign ? 1 << 1 : 1 << 6;
3694     }
3695 }
3696 
3697 target_ulong fclass_s(uint64_t frs1)
3698 {
3699     float32 f = frs1;
3700     bool sign = float32_is_neg(f);
3701 
3702     if (float32_is_infinity(f)) {
3703         return sign ? 1 << 0 : 1 << 7;
3704     } else if (float32_is_zero(f)) {
3705         return sign ? 1 << 3 : 1 << 4;
3706     } else if (float32_is_zero_or_denormal(f)) {
3707         return sign ? 1 << 2 : 1 << 5;
3708     } else if (float32_is_any_nan(f)) {
3709         float_status s = { }; /* for snan_bit_is_one */
3710         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3711     } else {
3712         return sign ? 1 << 1 : 1 << 6;
3713     }
3714 }
3715 
3716 target_ulong fclass_d(uint64_t frs1)
3717 {
3718     float64 f = frs1;
3719     bool sign = float64_is_neg(f);
3720 
3721     if (float64_is_infinity(f)) {
3722         return sign ? 1 << 0 : 1 << 7;
3723     } else if (float64_is_zero(f)) {
3724         return sign ? 1 << 3 : 1 << 4;
3725     } else if (float64_is_zero_or_denormal(f)) {
3726         return sign ? 1 << 2 : 1 << 5;
3727     } else if (float64_is_any_nan(f)) {
3728         float_status s = { }; /* for snan_bit_is_one */
3729         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3730     } else {
3731         return sign ? 1 << 1 : 1 << 6;
3732     }
3733 }
3734 
3735 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
3736 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
3737 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
3738 GEN_VEXT_V(vfclass_v_h, 2, 2)
3739 GEN_VEXT_V(vfclass_v_w, 4, 4)
3740 GEN_VEXT_V(vfclass_v_d, 8, 8)
3741 
3742 /* Vector Floating-Point Merge Instruction */
3743 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
3744 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3745                   CPURISCVState *env, uint32_t desc)          \
3746 {                                                             \
3747     uint32_t vm = vext_vm(desc);                              \
3748     uint32_t vl = env->vl;                                    \
3749     uint32_t i;                                               \
3750                                                               \
3751     for (i = env->vstart; i < vl; i++) {                      \
3752         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3753         *((ETYPE *)vd + H(i))                                 \
3754           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
3755     }                                                         \
3756     env->vstart = 0;                                          \
3757 }
3758 
3759 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
3760 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
3761 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
3762 
3763 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
3764 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3765 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
3766 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
3767 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
3768 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
3769 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
3770 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
3771 
3772 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
3773 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
3774 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
3775 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
3776 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
3777 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
3778 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
3779 
3780 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
3781 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
3782 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
3783 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
3784 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
3785 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
3786 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
3787 
3788 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
3789 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
3790 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
3791 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
3792 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
3793 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
3794 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
3795 
3796 /* Widening Floating-Point/Integer Type-Convert Instructions */
3797 /* (TD, T2, TX2) */
3798 #define WOP_UU_B uint16_t, uint8_t,  uint8_t
3799 #define WOP_UU_H uint32_t, uint16_t, uint16_t
3800 #define WOP_UU_W uint64_t, uint32_t, uint32_t
3801 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
3802 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
3803 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
3804 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
3805 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
3806 
3807 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
3808 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
3809 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
3810 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
3811 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
3812 
3813 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3814 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
3815 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
3816 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
3817 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
3818 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
3819 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
3820 
3821 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3822 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
3823 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
3824 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
3825 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
3826 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
3827 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
3828 
3829 /*
3830  * vfwcvt.f.f.v vd, vs2, vm
3831  * Convert single-width float to double-width float.
3832  */
3833 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
3834 {
3835     return float16_to_float32(a, true, s);
3836 }
3837 
3838 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
3839 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
3840 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
3841 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
3842 
3843 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
3844 /* (TD, T2, TX2) */
3845 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
3846 #define NOP_UU_H uint16_t, uint32_t, uint32_t
3847 #define NOP_UU_W uint32_t, uint64_t, uint64_t
3848 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3849 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
3850 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
3851 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
3852 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
3853 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
3854 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
3855 
3856 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
3857 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
3858 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
3859 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
3860 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
3861 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
3862 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
3863 
3864 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
3865 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
3866 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
3867 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
3868 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
3869 
3870 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
3871 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
3872 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
3873 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
3874 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
3875 
3876 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
3877 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
3878 {
3879     return float32_to_float16(a, true, s);
3880 }
3881 
3882 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
3883 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
3884 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
3885 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
3886 
3887 /*
3888  *** Vector Reduction Operations
3889  */
3890 /* Vector Single-Width Integer Reduction Instructions */
3891 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
3892 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
3893         void *vs2, CPURISCVState *env, uint32_t desc)     \
3894 {                                                         \
3895     uint32_t vm = vext_vm(desc);                          \
3896     uint32_t vl = env->vl;                                \
3897     uint32_t i;                                           \
3898     TD s1 =  *((TD *)vs1 + HD(0));                        \
3899                                                           \
3900     for (i = env->vstart; i < vl; i++) {                  \
3901         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
3902         if (!vm && !vext_elem_mask(v0, i)) {              \
3903             continue;                                     \
3904         }                                                 \
3905         s1 = OP(s1, (TD)s2);                              \
3906     }                                                     \
3907     *((TD *)vd + HD(0)) = s1;                             \
3908     env->vstart = 0;                                      \
3909 }
3910 
3911 /* vd[0] = sum(vs1[0], vs2[*]) */
3912 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
3913 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
3914 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
3915 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
3916 
3917 /* vd[0] = maxu(vs1[0], vs2[*]) */
3918 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
3919 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
3920 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
3921 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
3922 
3923 /* vd[0] = max(vs1[0], vs2[*]) */
3924 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
3925 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
3926 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
3927 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
3928 
3929 /* vd[0] = minu(vs1[0], vs2[*]) */
3930 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
3931 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
3932 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
3933 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
3934 
3935 /* vd[0] = min(vs1[0], vs2[*]) */
3936 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
3937 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
3938 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
3939 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
3940 
3941 /* vd[0] = and(vs1[0], vs2[*]) */
3942 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
3943 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
3944 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
3945 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
3946 
3947 /* vd[0] = or(vs1[0], vs2[*]) */
3948 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
3949 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
3950 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
3951 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
3952 
3953 /* vd[0] = xor(vs1[0], vs2[*]) */
3954 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
3955 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
3956 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
3957 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
3958 
3959 /* Vector Widening Integer Reduction Instructions */
3960 /* signed sum reduction into double-width accumulator */
3961 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
3962 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
3963 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
3964 
3965 /* Unsigned sum reduction into double-width accumulator */
3966 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
3967 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
3968 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
3969 
3970 /* Vector Single-Width Floating-Point Reduction Instructions */
3971 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
3972 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
3973                   void *vs2, CPURISCVState *env,           \
3974                   uint32_t desc)                           \
3975 {                                                          \
3976     uint32_t vm = vext_vm(desc);                           \
3977     uint32_t vl = env->vl;                                 \
3978     uint32_t i;                                            \
3979     TD s1 =  *((TD *)vs1 + HD(0));                         \
3980                                                            \
3981     for (i = env->vstart; i < vl; i++) {                   \
3982         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
3983         if (!vm && !vext_elem_mask(v0, i)) {               \
3984             continue;                                      \
3985         }                                                  \
3986         s1 = OP(s1, (TD)s2, &env->fp_status);              \
3987     }                                                      \
3988     *((TD *)vd + HD(0)) = s1;                              \
3989     env->vstart = 0;                                       \
3990 }
3991 
3992 /* Unordered sum */
3993 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
3994 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
3995 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
3996 
3997 /* Maximum value */
3998 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
3999 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4000 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4001 
4002 /* Minimum value */
4003 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4004 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4005 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4006 
4007 /* Vector Widening Floating-Point Reduction Instructions */
4008 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4009 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4010                             void *vs2, CPURISCVState *env, uint32_t desc)
4011 {
4012     uint32_t vm = vext_vm(desc);
4013     uint32_t vl = env->vl;
4014     uint32_t i;
4015     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4016 
4017     for (i = env->vstart; i < vl; i++) {
4018         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4019         if (!vm && !vext_elem_mask(v0, i)) {
4020             continue;
4021         }
4022         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4023                          &env->fp_status);
4024     }
4025     *((uint32_t *)vd + H4(0)) = s1;
4026     env->vstart = 0;
4027 }
4028 
4029 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4030                             void *vs2, CPURISCVState *env, uint32_t desc)
4031 {
4032     uint32_t vm = vext_vm(desc);
4033     uint32_t vl = env->vl;
4034     uint32_t i;
4035     uint64_t s1 =  *((uint64_t *)vs1);
4036 
4037     for (i = env->vstart; i < vl; i++) {
4038         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4039         if (!vm && !vext_elem_mask(v0, i)) {
4040             continue;
4041         }
4042         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4043                          &env->fp_status);
4044     }
4045     *((uint64_t *)vd) = s1;
4046     env->vstart = 0;
4047 }
4048 
4049 /*
4050  *** Vector Mask Operations
4051  */
4052 /* Vector Mask-Register Logical Instructions */
4053 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4054 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4055                   void *vs2, CPURISCVState *env,          \
4056                   uint32_t desc)                          \
4057 {                                                         \
4058     uint32_t vl = env->vl;                                \
4059     uint32_t i;                                           \
4060     int a, b;                                             \
4061                                                           \
4062     for (i = env->vstart; i < vl; i++) {                  \
4063         a = vext_elem_mask(vs1, i);                       \
4064         b = vext_elem_mask(vs2, i);                       \
4065         vext_set_elem_mask(vd, i, OP(b, a));              \
4066     }                                                     \
4067     env->vstart = 0;                                      \
4068 }
4069 
4070 #define DO_NAND(N, M)  (!(N & M))
4071 #define DO_ANDNOT(N, M)  (N & !M)
4072 #define DO_NOR(N, M)  (!(N | M))
4073 #define DO_ORNOT(N, M)  (N | !M)
4074 #define DO_XNOR(N, M)  (!(N ^ M))
4075 
4076 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4077 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4078 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
4079 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4080 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4081 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4082 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
4083 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4084 
4085 /* Vector count population in mask vcpop */
4086 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4087                              uint32_t desc)
4088 {
4089     target_ulong cnt = 0;
4090     uint32_t vm = vext_vm(desc);
4091     uint32_t vl = env->vl;
4092     int i;
4093 
4094     for (i = env->vstart; i < vl; i++) {
4095         if (vm || vext_elem_mask(v0, i)) {
4096             if (vext_elem_mask(vs2, i)) {
4097                 cnt++;
4098             }
4099         }
4100     }
4101     env->vstart = 0;
4102     return cnt;
4103 }
4104 
4105 /* vfirst find-first-set mask bit*/
4106 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4107                               uint32_t desc)
4108 {
4109     uint32_t vm = vext_vm(desc);
4110     uint32_t vl = env->vl;
4111     int i;
4112 
4113     for (i = env->vstart; i < vl; i++) {
4114         if (vm || vext_elem_mask(v0, i)) {
4115             if (vext_elem_mask(vs2, i)) {
4116                 return i;
4117             }
4118         }
4119     }
4120     env->vstart = 0;
4121     return -1LL;
4122 }
4123 
4124 enum set_mask_type {
4125     ONLY_FIRST = 1,
4126     INCLUDE_FIRST,
4127     BEFORE_FIRST,
4128 };
4129 
4130 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4131                    uint32_t desc, enum set_mask_type type)
4132 {
4133     uint32_t vm = vext_vm(desc);
4134     uint32_t vl = env->vl;
4135     int i;
4136     bool first_mask_bit = false;
4137 
4138     for (i = env->vstart; i < vl; i++) {
4139         if (!vm && !vext_elem_mask(v0, i)) {
4140             continue;
4141         }
4142         /* write a zero to all following active elements */
4143         if (first_mask_bit) {
4144             vext_set_elem_mask(vd, i, 0);
4145             continue;
4146         }
4147         if (vext_elem_mask(vs2, i)) {
4148             first_mask_bit = true;
4149             if (type == BEFORE_FIRST) {
4150                 vext_set_elem_mask(vd, i, 0);
4151             } else {
4152                 vext_set_elem_mask(vd, i, 1);
4153             }
4154         } else {
4155             if (type == ONLY_FIRST) {
4156                 vext_set_elem_mask(vd, i, 0);
4157             } else {
4158                 vext_set_elem_mask(vd, i, 1);
4159             }
4160         }
4161     }
4162     env->vstart = 0;
4163 }
4164 
4165 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4166                      uint32_t desc)
4167 {
4168     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4169 }
4170 
4171 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4172                      uint32_t desc)
4173 {
4174     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4175 }
4176 
4177 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4178                      uint32_t desc)
4179 {
4180     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4181 }
4182 
4183 /* Vector Iota Instruction */
4184 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4185 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4186                   uint32_t desc)                                          \
4187 {                                                                         \
4188     uint32_t vm = vext_vm(desc);                                          \
4189     uint32_t vl = env->vl;                                                \
4190     uint32_t sum = 0;                                                     \
4191     int i;                                                                \
4192                                                                           \
4193     for (i = env->vstart; i < vl; i++) {                                  \
4194         if (!vm && !vext_elem_mask(v0, i)) {                              \
4195             continue;                                                     \
4196         }                                                                 \
4197         *((ETYPE *)vd + H(i)) = sum;                                      \
4198         if (vext_elem_mask(vs2, i)) {                                     \
4199             sum++;                                                        \
4200         }                                                                 \
4201     }                                                                     \
4202     env->vstart = 0;                                                      \
4203 }
4204 
4205 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4206 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4207 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4208 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4209 
4210 /* Vector Element Index Instruction */
4211 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4212 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4213 {                                                                         \
4214     uint32_t vm = vext_vm(desc);                                          \
4215     uint32_t vl = env->vl;                                                \
4216     int i;                                                                \
4217                                                                           \
4218     for (i = env->vstart; i < vl; i++) {                                  \
4219         if (!vm && !vext_elem_mask(v0, i)) {                              \
4220             continue;                                                     \
4221         }                                                                 \
4222         *((ETYPE *)vd + H(i)) = i;                                        \
4223     }                                                                     \
4224     env->vstart = 0;                                                      \
4225 }
4226 
4227 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4228 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4229 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4230 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4231 
4232 /*
4233  *** Vector Permutation Instructions
4234  */
4235 
4236 /* Vector Slide Instructions */
4237 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4238 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4239                   CPURISCVState *env, uint32_t desc)                      \
4240 {                                                                         \
4241     uint32_t vm = vext_vm(desc);                                          \
4242     uint32_t vl = env->vl;                                                \
4243     target_ulong offset = s1, i_min, i;                                   \
4244                                                                           \
4245     i_min = MAX(env->vstart, offset);                                     \
4246     for (i = i_min; i < vl; i++) {                                        \
4247         if (!vm && !vext_elem_mask(v0, i)) {                              \
4248             continue;                                                     \
4249         }                                                                 \
4250         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4251     }                                                                     \
4252 }
4253 
4254 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4255 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4256 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4257 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4258 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4259 
4260 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4261 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4262                   CPURISCVState *env, uint32_t desc)                      \
4263 {                                                                         \
4264     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4265     uint32_t vm = vext_vm(desc);                                          \
4266     uint32_t vl = env->vl;                                                \
4267     target_ulong i_max, i;                                                \
4268                                                                           \
4269     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4270     for (i = env->vstart; i < i_max; ++i) {                               \
4271         if (vm || vext_elem_mask(v0, i)) {                                \
4272             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4273         }                                                                 \
4274     }                                                                     \
4275                                                                           \
4276     for (i = i_max; i < vl; ++i) {                                        \
4277         if (vm || vext_elem_mask(v0, i)) {                                \
4278             *((ETYPE *)vd + H(i)) = 0;                                    \
4279         }                                                                 \
4280     }                                                                     \
4281                                                                           \
4282     env->vstart = 0;                                                      \
4283 }
4284 
4285 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4286 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4287 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4288 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4289 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4290 
4291 #define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4292 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4293                      CPURISCVState *env, uint32_t desc)                     \
4294 {                                                                           \
4295     typedef uint##ESZ##_t ETYPE;                                            \
4296     uint32_t vm = vext_vm(desc);                                            \
4297     uint32_t vl = env->vl;                                                  \
4298     uint32_t i;                                                             \
4299                                                                             \
4300     for (i = env->vstart; i < vl; i++) {                                    \
4301         if (!vm && !vext_elem_mask(v0, i)) {                                \
4302             continue;                                                       \
4303         }                                                                   \
4304         if (i == 0) {                                                       \
4305             *((ETYPE *)vd + H(i)) = s1;                                     \
4306         } else {                                                            \
4307             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4308         }                                                                   \
4309     }                                                                       \
4310     env->vstart = 0;                                                        \
4311 }
4312 
4313 GEN_VEXT_VSLIE1UP(8,  H1)
4314 GEN_VEXT_VSLIE1UP(16, H2)
4315 GEN_VEXT_VSLIE1UP(32, H4)
4316 GEN_VEXT_VSLIE1UP(64, H8)
4317 
4318 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4319 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4320                   CPURISCVState *env, uint32_t desc)              \
4321 {                                                                 \
4322     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4323 }
4324 
4325 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4326 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4327 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4328 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4329 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4330 
4331 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4332 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4333                        CPURISCVState *env, uint32_t desc)                     \
4334 {                                                                             \
4335     typedef uint##ESZ##_t ETYPE;                                              \
4336     uint32_t vm = vext_vm(desc);                                              \
4337     uint32_t vl = env->vl;                                                    \
4338     uint32_t i;                                                               \
4339                                                                               \
4340     for (i = env->vstart; i < vl; i++) {                                      \
4341         if (!vm && !vext_elem_mask(v0, i)) {                                  \
4342             continue;                                                         \
4343         }                                                                     \
4344         if (i == vl - 1) {                                                    \
4345             *((ETYPE *)vd + H(i)) = s1;                                       \
4346         } else {                                                              \
4347             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4348         }                                                                     \
4349     }                                                                         \
4350     env->vstart = 0;                                                          \
4351 }
4352 
4353 GEN_VEXT_VSLIDE1DOWN(8,  H1)
4354 GEN_VEXT_VSLIDE1DOWN(16, H2)
4355 GEN_VEXT_VSLIDE1DOWN(32, H4)
4356 GEN_VEXT_VSLIDE1DOWN(64, H8)
4357 
4358 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4359 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4360                   CPURISCVState *env, uint32_t desc)              \
4361 {                                                                 \
4362     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4363 }
4364 
4365 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4366 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4367 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4368 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4369 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4370 
4371 /* Vector Floating-Point Slide Instructions */
4372 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4373 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4374                   CPURISCVState *env, uint32_t desc)          \
4375 {                                                             \
4376     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4377 }
4378 
4379 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4380 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4381 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4382 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4383 
4384 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4385 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4386                   CPURISCVState *env, uint32_t desc)          \
4387 {                                                             \
4388     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4389 }
4390 
4391 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4392 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4393 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4394 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4395 
4396 /* Vector Register Gather Instruction */
4397 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4398 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4399                   CPURISCVState *env, uint32_t desc)                      \
4400 {                                                                         \
4401     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4402     uint32_t vm = vext_vm(desc);                                          \
4403     uint32_t vl = env->vl;                                                \
4404     uint64_t index;                                                       \
4405     uint32_t i;                                                           \
4406                                                                           \
4407     for (i = env->vstart; i < vl; i++) {                                  \
4408         if (!vm && !vext_elem_mask(v0, i)) {                              \
4409             continue;                                                     \
4410         }                                                                 \
4411         index = *((TS1 *)vs1 + HS1(i));                                   \
4412         if (index >= vlmax) {                                             \
4413             *((TS2 *)vd + HS2(i)) = 0;                                    \
4414         } else {                                                          \
4415             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4416         }                                                                 \
4417     }                                                                     \
4418     env->vstart = 0;                                                      \
4419 }
4420 
4421 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4422 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4423 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4424 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4425 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4426 
4427 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4428 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4429 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4430 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4431 
4432 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4433 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4434                   CPURISCVState *env, uint32_t desc)                      \
4435 {                                                                         \
4436     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4437     uint32_t vm = vext_vm(desc);                                          \
4438     uint32_t vl = env->vl;                                                \
4439     uint64_t index = s1;                                                  \
4440     uint32_t i;                                                           \
4441                                                                           \
4442     for (i = env->vstart; i < vl; i++) {                                  \
4443         if (!vm && !vext_elem_mask(v0, i)) {                              \
4444             continue;                                                     \
4445         }                                                                 \
4446         if (index >= vlmax) {                                             \
4447             *((ETYPE *)vd + H(i)) = 0;                                    \
4448         } else {                                                          \
4449             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4450         }                                                                 \
4451     }                                                                     \
4452     env->vstart = 0;                                                      \
4453 }
4454 
4455 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4456 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4457 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4458 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4459 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4460 
4461 /* Vector Compress Instruction */
4462 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4463 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4464                   CPURISCVState *env, uint32_t desc)                      \
4465 {                                                                         \
4466     uint32_t vl = env->vl;                                                \
4467     uint32_t num = 0, i;                                                  \
4468                                                                           \
4469     for (i = env->vstart; i < vl; i++) {                                  \
4470         if (!vext_elem_mask(vs1, i)) {                                    \
4471             continue;                                                     \
4472         }                                                                 \
4473         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4474         num++;                                                            \
4475     }                                                                     \
4476     env->vstart = 0;                                                      \
4477 }
4478 
4479 /* Compress into vd elements of vs2 where vs1 is enabled */
4480 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4481 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4482 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4483 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4484 
4485 /* Vector Whole Register Move */
4486 #define GEN_VEXT_VMV_WHOLE(NAME, LEN)                      \
4487 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
4488                   uint32_t desc)                           \
4489 {                                                          \
4490     /* EEW = 8 */                                          \
4491     uint32_t maxsz = simd_maxsz(desc);                     \
4492     uint32_t i = env->vstart;                              \
4493                                                            \
4494     memcpy((uint8_t *)vd + H1(i),                          \
4495            (uint8_t *)vs2 + H1(i),                         \
4496            maxsz - env->vstart);                           \
4497                                                            \
4498     env->vstart = 0;                                       \
4499 }
4500 
4501 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
4502 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
4503 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
4504 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
4505 
4506 /* Vector Integer Extension */
4507 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4508 void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4509                   CPURISCVState *env, uint32_t desc)             \
4510 {                                                                \
4511     uint32_t vl = env->vl;                                       \
4512     uint32_t vm = vext_vm(desc);                                 \
4513     uint32_t i;                                                  \
4514                                                                  \
4515     for (i = env->vstart; i < vl; i++) {                         \
4516         if (!vm && !vext_elem_mask(v0, i)) {                     \
4517             continue;                                            \
4518         }                                                        \
4519         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4520     }                                                            \
4521     env->vstart = 0;                                             \
4522 }
4523 
4524 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4525 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4526 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4527 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4528 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4529 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4530 
4531 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4532 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4533 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4534 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4535 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4536 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4537