xref: /openbmc/qemu/target/riscv/vector_helper.c (revision 30206bd8)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "exec/memop.h"
22 #include "exec/exec-all.h"
23 #include "exec/helper-proto.h"
24 #include "fpu/softfloat.h"
25 #include "tcg/tcg-gvec-desc.h"
26 #include "internals.h"
27 #include <math.h>
28 
29 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
30                             target_ulong s2)
31 {
32     int vlmax, vl;
33     RISCVCPU *cpu = env_archcpu(env);
34     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
35     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
36     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
37     bool vill = FIELD_EX64(s2, VTYPE, VILL);
38     target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
39 
40     if (lmul & 4) {
41         /* Fractional LMUL. */
42         if (lmul == 4 ||
43             cpu->cfg.elen >> (8 - lmul) < sew) {
44             vill = true;
45         }
46     }
47 
48     if ((sew > cpu->cfg.elen)
49         || vill
50         || (ediv != 0)
51         || (reserved != 0)) {
52         /* only set vill bit. */
53         env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
54         env->vl = 0;
55         env->vstart = 0;
56         return 0;
57     }
58 
59     vlmax = vext_get_vlmax(cpu, s2);
60     if (s1 <= vlmax) {
61         vl = s1;
62     } else {
63         vl = vlmax;
64     }
65     env->vl = vl;
66     env->vtype = s2;
67     env->vstart = 0;
68     return vl;
69 }
70 
71 /*
72  * Note that vector data is stored in host-endian 64-bit chunks,
73  * so addressing units smaller than that needs a host-endian fixup.
74  */
75 #ifdef HOST_WORDS_BIGENDIAN
76 #define H1(x)   ((x) ^ 7)
77 #define H1_2(x) ((x) ^ 6)
78 #define H1_4(x) ((x) ^ 4)
79 #define H2(x)   ((x) ^ 3)
80 #define H4(x)   ((x) ^ 1)
81 #define H8(x)   ((x))
82 #else
83 #define H1(x)   (x)
84 #define H1_2(x) (x)
85 #define H1_4(x) (x)
86 #define H2(x)   (x)
87 #define H4(x)   (x)
88 #define H8(x)   (x)
89 #endif
90 
91 static inline uint32_t vext_nf(uint32_t desc)
92 {
93     return FIELD_EX32(simd_data(desc), VDATA, NF);
94 }
95 
96 static inline uint32_t vext_vm(uint32_t desc)
97 {
98     return FIELD_EX32(simd_data(desc), VDATA, VM);
99 }
100 
101 /*
102  * Encode LMUL to lmul as following:
103  *     LMUL    vlmul    lmul
104  *      1       000       0
105  *      2       001       1
106  *      4       010       2
107  *      8       011       3
108  *      -       100       -
109  *     1/8      101      -3
110  *     1/4      110      -2
111  *     1/2      111      -1
112  */
113 static inline int32_t vext_lmul(uint32_t desc)
114 {
115     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
116 }
117 
118 /*
119  * Get vector group length in bytes. Its range is [64, 2048].
120  *
121  * As simd_desc support at most 256, the max vlen is 512 bits.
122  * So vlen in bytes is encoded as maxsz.
123  */
124 static inline uint32_t vext_maxsz(uint32_t desc)
125 {
126     return simd_maxsz(desc) << vext_lmul(desc);
127 }
128 
129 /*
130  * This function checks watchpoint before real load operation.
131  *
132  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
133  * In user mode, there is no watchpoint support now.
134  *
135  * It will trigger an exception if there is no mapping in TLB
136  * and page table walk can't fill the TLB entry. Then the guest
137  * software can return here after process the exception or never return.
138  */
139 static void probe_pages(CPURISCVState *env, target_ulong addr,
140                         target_ulong len, uintptr_t ra,
141                         MMUAccessType access_type)
142 {
143     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
144     target_ulong curlen = MIN(pagelen, len);
145 
146     probe_access(env, addr, curlen, access_type,
147                  cpu_mmu_index(env, false), ra);
148     if (len > curlen) {
149         addr += curlen;
150         curlen = len - curlen;
151         probe_access(env, addr, curlen, access_type,
152                      cpu_mmu_index(env, false), ra);
153     }
154 }
155 
156 static inline void vext_set_elem_mask(void *v0, int index,
157                                       uint8_t value)
158 {
159     int idx = index / 64;
160     int pos = index % 64;
161     uint64_t old = ((uint64_t *)v0)[idx];
162     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
163 }
164 
165 /*
166  * Earlier designs (pre-0.9) had a varying number of bits
167  * per mask value (MLEN). In the 0.9 design, MLEN=1.
168  * (Section 4.5)
169  */
170 static inline int vext_elem_mask(void *v0, int index)
171 {
172     int idx = index / 64;
173     int pos = index  % 64;
174     return (((uint64_t *)v0)[idx] >> pos) & 1;
175 }
176 
177 /* elements operations for load and store */
178 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
179                                uint32_t idx, void *vd, uintptr_t retaddr);
180 
181 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
182 static void NAME(CPURISCVState *env, abi_ptr addr,         \
183                  uint32_t idx, void *vd, uintptr_t retaddr)\
184 {                                                          \
185     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
186     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
187 }                                                          \
188 
189 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
190 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
191 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
192 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
193 
194 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
195 static void NAME(CPURISCVState *env, abi_ptr addr,         \
196                  uint32_t idx, void *vd, uintptr_t retaddr)\
197 {                                                          \
198     ETYPE data = *((ETYPE *)vd + H(idx));                  \
199     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
200 }
201 
202 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
203 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
204 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
205 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
206 
207 /*
208  *** stride: access vector element from strided memory
209  */
210 static void
211 vext_ldst_stride(void *vd, void *v0, target_ulong base,
212                  target_ulong stride, CPURISCVState *env,
213                  uint32_t desc, uint32_t vm,
214                  vext_ldst_elem_fn *ldst_elem,
215                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
216 {
217     uint32_t i, k;
218     uint32_t nf = vext_nf(desc);
219     uint32_t vlmax = vext_maxsz(desc) / esz;
220 
221     /* probe every access*/
222     for (i = 0; i < env->vl; i++) {
223         if (!vm && !vext_elem_mask(v0, i)) {
224             continue;
225         }
226         probe_pages(env, base + stride * i, nf * esz, ra, access_type);
227     }
228     /* do real access */
229     for (i = 0; i < env->vl; i++) {
230         k = 0;
231         if (!vm && !vext_elem_mask(v0, i)) {
232             continue;
233         }
234         while (k < nf) {
235             target_ulong addr = base + stride * i + k * esz;
236             ldst_elem(env, addr, i + k * vlmax, vd, ra);
237             k++;
238         }
239     }
240 }
241 
242 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
243 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
244                   target_ulong stride, CPURISCVState *env,              \
245                   uint32_t desc)                                        \
246 {                                                                       \
247     uint32_t vm = vext_vm(desc);                                        \
248     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
249                      sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);            \
250 }
251 
252 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
253 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
254 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
255 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
256 
257 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
258 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
259                   target_ulong stride, CPURISCVState *env,              \
260                   uint32_t desc)                                        \
261 {                                                                       \
262     uint32_t vm = vext_vm(desc);                                        \
263     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
264                      sizeof(ETYPE), GETPC(), MMU_DATA_STORE);           \
265 }
266 
267 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
268 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
269 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
270 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
271 
272 /*
273  *** unit-stride: access elements stored contiguously in memory
274  */
275 
276 /* unmasked unit-stride load and store operation*/
277 static void
278 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
279              vext_ldst_elem_fn *ldst_elem,
280              uint32_t esz, uintptr_t ra, MMUAccessType access_type)
281 {
282     uint32_t i, k;
283     uint32_t nf = vext_nf(desc);
284     uint32_t vlmax = vext_maxsz(desc) / esz;
285 
286     /* probe every access */
287     probe_pages(env, base, env->vl * nf * esz, ra, access_type);
288     /* load bytes from guest memory */
289     for (i = 0; i < env->vl; i++) {
290         k = 0;
291         while (k < nf) {
292             target_ulong addr = base + (i * nf + k) * esz;
293             ldst_elem(env, addr, i + k * vlmax, vd, ra);
294             k++;
295         }
296     }
297 }
298 
299 /*
300  * masked unit-stride load and store operation will be a special case of stride,
301  * stride = NF * sizeof (MTYPE)
302  */
303 
304 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
305 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
306                          CPURISCVState *env, uint32_t desc)             \
307 {                                                                       \
308     uint32_t stride = vext_nf(desc) * sizeof(ETYPE);                    \
309     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
310                      sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);            \
311 }                                                                       \
312                                                                         \
313 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
314                   CPURISCVState *env, uint32_t desc)                    \
315 {                                                                       \
316     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
317                  sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);                \
318 }
319 
320 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
321 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
322 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
323 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
324 
325 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                           \
326 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
327                          CPURISCVState *env, uint32_t desc)             \
328 {                                                                       \
329     uint32_t stride = vext_nf(desc) * sizeof(ETYPE);                    \
330     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,  \
331                      sizeof(ETYPE), GETPC(), MMU_DATA_STORE);           \
332 }                                                                       \
333                                                                         \
334 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
335                   CPURISCVState *env, uint32_t desc)                    \
336 {                                                                       \
337     vext_ldst_us(vd, base, env, desc, STORE_FN,                         \
338                  sizeof(ETYPE), GETPC(), MMU_DATA_STORE);               \
339 }
340 
341 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
342 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
343 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
344 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
345 
346 /*
347  *** index: access vector element from indexed memory
348  */
349 typedef target_ulong vext_get_index_addr(target_ulong base,
350         uint32_t idx, void *vs2);
351 
352 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
353 static target_ulong NAME(target_ulong base,            \
354                          uint32_t idx, void *vs2)      \
355 {                                                      \
356     return (base + *((ETYPE *)vs2 + H(idx)));          \
357 }
358 
359 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
360 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
361 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
362 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
363 
364 static inline void
365 vext_ldst_index(void *vd, void *v0, target_ulong base,
366                 void *vs2, CPURISCVState *env, uint32_t desc,
367                 vext_get_index_addr get_index_addr,
368                 vext_ldst_elem_fn *ldst_elem,
369                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
370 {
371     uint32_t i, k;
372     uint32_t nf = vext_nf(desc);
373     uint32_t vm = vext_vm(desc);
374     uint32_t vlmax = vext_maxsz(desc) / esz;
375 
376     /* probe every access*/
377     for (i = 0; i < env->vl; i++) {
378         if (!vm && !vext_elem_mask(v0, i)) {
379             continue;
380         }
381         probe_pages(env, get_index_addr(base, i, vs2), nf * esz, ra,
382                     access_type);
383     }
384     /* load bytes from guest memory */
385     for (i = 0; i < env->vl; i++) {
386         k = 0;
387         if (!vm && !vext_elem_mask(v0, i)) {
388             continue;
389         }
390         while (k < nf) {
391             abi_ptr addr = get_index_addr(base, i, vs2) + k * esz;
392             ldst_elem(env, addr, i + k * vlmax, vd, ra);
393             k++;
394         }
395     }
396 }
397 
398 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
399 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
400                   void *vs2, CPURISCVState *env, uint32_t desc)            \
401 {                                                                          \
402     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
403                     LOAD_FN, sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);       \
404 }
405 
406 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
407 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
408 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
409 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
410 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
411 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
412 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
413 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
414 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
415 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
416 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
417 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
418 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
419 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
420 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
421 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
422 
423 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
424 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
425                   void *vs2, CPURISCVState *env, uint32_t desc)  \
426 {                                                                \
427     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
428                     STORE_FN, sizeof(ETYPE),                     \
429                     GETPC(), MMU_DATA_STORE);                    \
430 }
431 
432 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
433 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
434 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
435 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
436 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
437 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
438 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
439 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
440 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
441 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
442 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
443 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
444 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
445 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
446 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
447 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
448 
449 /*
450  *** unit-stride fault-only-fisrt load instructions
451  */
452 static inline void
453 vext_ldff(void *vd, void *v0, target_ulong base,
454           CPURISCVState *env, uint32_t desc,
455           vext_ldst_elem_fn *ldst_elem,
456           uint32_t esz, uintptr_t ra)
457 {
458     void *host;
459     uint32_t i, k, vl = 0;
460     uint32_t nf = vext_nf(desc);
461     uint32_t vm = vext_vm(desc);
462     uint32_t vlmax = vext_maxsz(desc) / esz;
463     target_ulong addr, offset, remain;
464 
465     /* probe every access*/
466     for (i = 0; i < env->vl; i++) {
467         if (!vm && !vext_elem_mask(v0, i)) {
468             continue;
469         }
470         addr = base + nf * i * esz;
471         if (i == 0) {
472             probe_pages(env, addr, nf * esz, ra, MMU_DATA_LOAD);
473         } else {
474             /* if it triggers an exception, no need to check watchpoint */
475             remain = nf * esz;
476             while (remain > 0) {
477                 offset = -(addr | TARGET_PAGE_MASK);
478                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
479                                          cpu_mmu_index(env, false));
480                 if (host) {
481 #ifdef CONFIG_USER_ONLY
482                     if (page_check_range(addr, nf * esz, PAGE_READ) < 0) {
483                         vl = i;
484                         goto ProbeSuccess;
485                     }
486 #else
487                     probe_pages(env, addr, nf * esz, ra, MMU_DATA_LOAD);
488 #endif
489                 } else {
490                     vl = i;
491                     goto ProbeSuccess;
492                 }
493                 if (remain <=  offset) {
494                     break;
495                 }
496                 remain -= offset;
497                 addr += offset;
498             }
499         }
500     }
501 ProbeSuccess:
502     /* load bytes from guest memory */
503     if (vl != 0) {
504         env->vl = vl;
505     }
506     for (i = 0; i < env->vl; i++) {
507         k = 0;
508         if (!vm && !vext_elem_mask(v0, i)) {
509             continue;
510         }
511         while (k < nf) {
512             target_ulong addr = base + (i * nf + k) * esz;
513             ldst_elem(env, addr, i + k * vlmax, vd, ra);
514             k++;
515         }
516     }
517 }
518 
519 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
520 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
521                   CPURISCVState *env, uint32_t desc)      \
522 {                                                         \
523     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
524               sizeof(ETYPE), GETPC());                    \
525 }
526 
527 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
528 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
529 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
530 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
531 
532 #define DO_SWAP(N, M) (M)
533 #define DO_AND(N, M)  (N & M)
534 #define DO_XOR(N, M)  (N ^ M)
535 #define DO_OR(N, M)   (N | M)
536 #define DO_ADD(N, M)  (N + M)
537 
538 /* Signed min/max */
539 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
540 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
541 
542 /* Unsigned min/max */
543 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
544 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
545 
546 /*
547  *** load and store whole register instructions
548  */
549 static void
550 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
551                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
552                 MMUAccessType access_type)
553 {
554     uint32_t i, k;
555     uint32_t nf = vext_nf(desc);
556     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
557     uint32_t max_elems = vlenb >> esz;
558 
559     /* probe every access */
560     probe_pages(env, base, vlenb * nf, ra, access_type);
561 
562     /* load bytes from guest memory */
563     for (k = 0; k < nf; k++) {
564         for (i = 0; i < max_elems; i++) {
565             target_ulong addr = base + ((i + k * max_elems) << esz);
566             ldst_elem(env, addr, i + k * max_elems, vd, ra);
567         }
568     }
569 }
570 
571 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
572 void HELPER(NAME)(void *vd, target_ulong base,       \
573                   CPURISCVState *env, uint32_t desc) \
574 {                                                    \
575     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
576                     ctzl(sizeof(ETYPE)), GETPC(),    \
577                     MMU_DATA_LOAD);                  \
578 }
579 
580 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
581 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
582 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
583 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
584 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
585 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
586 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
587 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
588 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
589 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
590 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
591 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
592 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
593 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
594 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
595 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
596 
597 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
598 void HELPER(NAME)(void *vd, target_ulong base,       \
599                   CPURISCVState *env, uint32_t desc) \
600 {                                                    \
601     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
602                     ctzl(sizeof(ETYPE)), GETPC(),    \
603                     MMU_DATA_STORE);                 \
604 }
605 
606 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
607 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
608 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
609 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
610 
611 /*
612  *** Vector Integer Arithmetic Instructions
613  */
614 
615 /* expand macro args before macro */
616 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
617 
618 /* (TD, T1, T2, TX1, TX2) */
619 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
620 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
621 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
622 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
623 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
624 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
625 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
626 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
627 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
628 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
629 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
630 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
631 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
632 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
633 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
634 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
635 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
636 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
637 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
638 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
639 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
640 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
641 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
642 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
643 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
644 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
645 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
646 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
647 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
648 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
649 
650 /* operation of two vector elements */
651 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
652 
653 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
654 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
655 {                                                               \
656     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
657     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
658     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
659 }
660 #define DO_SUB(N, M) (N - M)
661 #define DO_RSUB(N, M) (M - N)
662 
663 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
664 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
665 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
666 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
667 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
668 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
669 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
670 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
671 
672 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
673                        CPURISCVState *env, uint32_t desc,
674                        uint32_t esz, uint32_t dsz,
675                        opivv2_fn *fn)
676 {
677     uint32_t vm = vext_vm(desc);
678     uint32_t vl = env->vl;
679     uint32_t i;
680 
681     for (i = 0; i < vl; i++) {
682         if (!vm && !vext_elem_mask(v0, i)) {
683             continue;
684         }
685         fn(vd, vs1, vs2, i);
686     }
687 }
688 
689 /* generate the helpers for OPIVV */
690 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
691 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
692                   void *vs2, CPURISCVState *env,          \
693                   uint32_t desc)                          \
694 {                                                         \
695     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
696                do_##NAME);                                \
697 }
698 
699 GEN_VEXT_VV(vadd_vv_b, 1, 1)
700 GEN_VEXT_VV(vadd_vv_h, 2, 2)
701 GEN_VEXT_VV(vadd_vv_w, 4, 4)
702 GEN_VEXT_VV(vadd_vv_d, 8, 8)
703 GEN_VEXT_VV(vsub_vv_b, 1, 1)
704 GEN_VEXT_VV(vsub_vv_h, 2, 2)
705 GEN_VEXT_VV(vsub_vv_w, 4, 4)
706 GEN_VEXT_VV(vsub_vv_d, 8, 8)
707 
708 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
709 
710 /*
711  * (T1)s1 gives the real operator type.
712  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
713  */
714 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
715 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
716 {                                                                   \
717     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
718     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
719 }
720 
721 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
722 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
723 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
724 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
725 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
726 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
727 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
728 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
729 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
730 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
731 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
732 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
733 
734 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
735                        CPURISCVState *env, uint32_t desc,
736                        uint32_t esz, uint32_t dsz,
737                        opivx2_fn fn)
738 {
739     uint32_t vm = vext_vm(desc);
740     uint32_t vl = env->vl;
741     uint32_t i;
742 
743     for (i = 0; i < vl; i++) {
744         if (!vm && !vext_elem_mask(v0, i)) {
745             continue;
746         }
747         fn(vd, s1, vs2, i);
748     }
749 }
750 
751 /* generate the helpers for OPIVX */
752 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
753 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
754                   void *vs2, CPURISCVState *env,          \
755                   uint32_t desc)                          \
756 {                                                         \
757     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
758                do_##NAME);                                \
759 }
760 
761 GEN_VEXT_VX(vadd_vx_b, 1, 1)
762 GEN_VEXT_VX(vadd_vx_h, 2, 2)
763 GEN_VEXT_VX(vadd_vx_w, 4, 4)
764 GEN_VEXT_VX(vadd_vx_d, 8, 8)
765 GEN_VEXT_VX(vsub_vx_b, 1, 1)
766 GEN_VEXT_VX(vsub_vx_h, 2, 2)
767 GEN_VEXT_VX(vsub_vx_w, 4, 4)
768 GEN_VEXT_VX(vsub_vx_d, 8, 8)
769 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
770 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
771 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
772 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
773 
774 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
775 {
776     intptr_t oprsz = simd_oprsz(desc);
777     intptr_t i;
778 
779     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
780         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
781     }
782 }
783 
784 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
785 {
786     intptr_t oprsz = simd_oprsz(desc);
787     intptr_t i;
788 
789     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
790         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
791     }
792 }
793 
794 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
795 {
796     intptr_t oprsz = simd_oprsz(desc);
797     intptr_t i;
798 
799     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
800         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
801     }
802 }
803 
804 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
805 {
806     intptr_t oprsz = simd_oprsz(desc);
807     intptr_t i;
808 
809     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
810         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
811     }
812 }
813 
814 /* Vector Widening Integer Add/Subtract */
815 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
816 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
817 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
818 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
819 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
820 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
821 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
822 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
823 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
824 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
825 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
826 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
827 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
828 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
829 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
830 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
831 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
832 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
833 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
834 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
835 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
836 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
837 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
838 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
839 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
840 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
841 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
842 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
843 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
844 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
845 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
846 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
847 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
848 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
849 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
850 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
851 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
852 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
853 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
854 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
855 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
856 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
857 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
858 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
859 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
860 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
861 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
862 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
863 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
864 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
865 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
866 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
867 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
868 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
869 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
870 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
871 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
872 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
873 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
874 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
875 
876 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
877 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
878 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
879 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
880 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
881 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
882 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
883 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
884 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
885 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
886 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
887 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
888 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
889 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
890 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
891 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
892 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
893 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
894 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
895 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
896 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
897 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
898 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
899 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
900 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
901 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
902 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
903 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
904 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
905 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
906 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
907 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
908 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
909 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
910 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
911 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
912 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
913 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
914 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
915 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
916 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
917 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
918 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
919 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
920 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
921 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
922 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
923 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
924 
925 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
926 #define DO_VADC(N, M, C) (N + M + C)
927 #define DO_VSBC(N, M, C) (N - M - C)
928 
929 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
930 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
931                   CPURISCVState *env, uint32_t desc)          \
932 {                                                             \
933     uint32_t vl = env->vl;                                    \
934     uint32_t i;                                               \
935                                                               \
936     for (i = 0; i < vl; i++) {                                \
937         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
938         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
939         uint8_t carry = vext_elem_mask(v0, i);                \
940                                                               \
941         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
942     }                                                         \
943 }
944 
945 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
946 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
947 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
948 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
949 
950 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
951 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
952 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
953 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
954 
955 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
956 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
957                   CPURISCVState *env, uint32_t desc)                     \
958 {                                                                        \
959     uint32_t vl = env->vl;                                               \
960     uint32_t i;                                                          \
961                                                                          \
962     for (i = 0; i < vl; i++) {                                           \
963         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
964         uint8_t carry = vext_elem_mask(v0, i);                           \
965                                                                          \
966         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
967     }                                                                    \
968 }
969 
970 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
971 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
972 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
973 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
974 
975 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
976 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
977 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
978 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
979 
980 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
981                           (__typeof(N))(N + M) < N)
982 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
983 
984 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
985 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
986                   CPURISCVState *env, uint32_t desc)          \
987 {                                                             \
988     uint32_t vl = env->vl;                                    \
989     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
990     uint32_t i;                                               \
991                                                               \
992     for (i = 0; i < vl; i++) {                                \
993         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
994         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
995         uint8_t carry = vext_elem_mask(v0, i);                \
996                                                               \
997         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
998     }                                                         \
999     for (; i < vlmax; i++) {                                  \
1000         vext_set_elem_mask(vd, i, 0);                         \
1001     }                                                         \
1002 }
1003 
1004 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1005 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1006 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1007 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1008 
1009 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1010 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1011 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1012 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1013 
1014 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1015 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1016                   void *vs2, CPURISCVState *env, uint32_t desc) \
1017 {                                                               \
1018     uint32_t vl = env->vl;                                      \
1019     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);          \
1020     uint32_t i;                                                 \
1021                                                                 \
1022     for (i = 0; i < vl; i++) {                                  \
1023         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1024         uint8_t carry = vext_elem_mask(v0, i);                  \
1025                                                                 \
1026         vext_set_elem_mask(vd, i,                               \
1027                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1028     }                                                           \
1029     for (; i < vlmax; i++) {                                    \
1030         vext_set_elem_mask(vd, i, 0);                           \
1031     }                                                           \
1032 }
1033 
1034 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1035 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1036 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1037 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1038 
1039 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1040 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1041 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1042 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1043 
1044 /* Vector Bitwise Logical Instructions */
1045 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1046 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1047 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1048 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1049 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1050 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1051 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1052 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1053 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1054 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1055 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1056 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1057 GEN_VEXT_VV(vand_vv_b, 1, 1)
1058 GEN_VEXT_VV(vand_vv_h, 2, 2)
1059 GEN_VEXT_VV(vand_vv_w, 4, 4)
1060 GEN_VEXT_VV(vand_vv_d, 8, 8)
1061 GEN_VEXT_VV(vor_vv_b, 1, 1)
1062 GEN_VEXT_VV(vor_vv_h, 2, 2)
1063 GEN_VEXT_VV(vor_vv_w, 4, 4)
1064 GEN_VEXT_VV(vor_vv_d, 8, 8)
1065 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1066 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1067 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1068 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1069 
1070 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1071 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1072 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1073 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1074 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1075 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1076 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1077 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1078 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1079 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1080 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1081 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1082 GEN_VEXT_VX(vand_vx_b, 1, 1)
1083 GEN_VEXT_VX(vand_vx_h, 2, 2)
1084 GEN_VEXT_VX(vand_vx_w, 4, 4)
1085 GEN_VEXT_VX(vand_vx_d, 8, 8)
1086 GEN_VEXT_VX(vor_vx_b, 1, 1)
1087 GEN_VEXT_VX(vor_vx_h, 2, 2)
1088 GEN_VEXT_VX(vor_vx_w, 4, 4)
1089 GEN_VEXT_VX(vor_vx_d, 8, 8)
1090 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1091 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1092 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1093 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1094 
1095 /* Vector Single-Width Bit Shift Instructions */
1096 #define DO_SLL(N, M)  (N << (M))
1097 #define DO_SRL(N, M)  (N >> (M))
1098 
1099 /* generate the helpers for shift instructions with two vector operators */
1100 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1101 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1102                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1103 {                                                                         \
1104     uint32_t vm = vext_vm(desc);                                          \
1105     uint32_t vl = env->vl;                                                \
1106     uint32_t i;                                                           \
1107                                                                           \
1108     for (i = 0; i < vl; i++) {                                            \
1109         if (!vm && !vext_elem_mask(v0, i)) {                              \
1110             continue;                                                     \
1111         }                                                                 \
1112         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1113         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1114         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1115     }                                                                     \
1116 }
1117 
1118 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1119 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1120 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1121 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1122 
1123 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1124 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1125 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1126 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1127 
1128 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1129 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1130 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1131 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1132 
1133 /* generate the helpers for shift instructions with one vector and one scalar */
1134 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1135 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1136         void *vs2, CPURISCVState *env, uint32_t desc)       \
1137 {                                                           \
1138     uint32_t vm = vext_vm(desc);                            \
1139     uint32_t vl = env->vl;                                  \
1140     uint32_t i;                                             \
1141                                                             \
1142     for (i = 0; i < vl; i++) {                              \
1143         if (!vm && !vext_elem_mask(v0, i)) {                \
1144             continue;                                       \
1145         }                                                   \
1146         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1147         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1148     }                                                       \
1149 }
1150 
1151 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1152 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1153 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1154 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1155 
1156 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1157 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1158 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1159 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1160 
1161 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1162 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1163 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1164 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1165 
1166 /* Vector Narrowing Integer Right Shift Instructions */
1167 GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1168 GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1169 GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1170 GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1171 GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1172 GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1173 GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1174 GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1175 GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1176 GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1177 GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1178 GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1179 
1180 /* Vector Integer Comparison Instructions */
1181 #define DO_MSEQ(N, M) (N == M)
1182 #define DO_MSNE(N, M) (N != M)
1183 #define DO_MSLT(N, M) (N < M)
1184 #define DO_MSLE(N, M) (N <= M)
1185 #define DO_MSGT(N, M) (N > M)
1186 
1187 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1188 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1189                   CPURISCVState *env, uint32_t desc)          \
1190 {                                                             \
1191     uint32_t vm = vext_vm(desc);                              \
1192     uint32_t vl = env->vl;                                    \
1193     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
1194     uint32_t i;                                               \
1195                                                               \
1196     for (i = 0; i < vl; i++) {                                \
1197         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1198         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1199         if (!vm && !vext_elem_mask(v0, i)) {                  \
1200             continue;                                         \
1201         }                                                     \
1202         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1203     }                                                         \
1204     for (; i < vlmax; i++) {                                  \
1205         vext_set_elem_mask(vd, i, 0);                         \
1206     }                                                         \
1207 }
1208 
1209 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1210 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1211 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1212 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1213 
1214 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1215 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1216 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1217 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1218 
1219 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1220 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1221 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1222 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1223 
1224 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1225 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1226 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1227 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1228 
1229 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1230 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1231 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1232 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1233 
1234 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1235 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1236 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1237 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1238 
1239 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1240 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1241                   CPURISCVState *env, uint32_t desc)                \
1242 {                                                                   \
1243     uint32_t vm = vext_vm(desc);                                    \
1244     uint32_t vl = env->vl;                                          \
1245     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
1246     uint32_t i;                                                     \
1247                                                                     \
1248     for (i = 0; i < vl; i++) {                                      \
1249         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1250         if (!vm && !vext_elem_mask(v0, i)) {                        \
1251             continue;                                               \
1252         }                                                           \
1253         vext_set_elem_mask(vd, i,                                   \
1254                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1255     }                                                               \
1256     for (; i < vlmax; i++) {                                        \
1257         vext_set_elem_mask(vd, i, 0);                               \
1258     }                                                               \
1259 }
1260 
1261 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1262 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1263 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1264 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1265 
1266 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1267 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1268 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1269 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1270 
1271 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1272 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1273 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1274 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1275 
1276 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1277 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1278 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1279 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1280 
1281 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1282 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1283 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1284 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1285 
1286 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1287 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1288 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1289 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1290 
1291 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1292 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1293 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1294 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1295 
1296 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1297 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1298 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1299 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1300 
1301 /* Vector Integer Min/Max Instructions */
1302 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1303 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1304 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1305 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1306 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1307 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1308 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1309 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1310 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1311 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1312 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1313 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1314 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1315 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1316 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1317 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1318 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1319 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1320 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1321 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1322 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1323 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1324 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1325 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1326 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1327 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1328 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1329 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1330 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1331 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1332 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1333 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1334 
1335 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1336 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1337 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1338 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1339 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1340 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1341 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1342 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1343 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1344 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1345 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1346 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1347 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1348 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1349 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1350 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1351 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1352 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1353 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1354 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1355 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1356 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1357 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1358 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1359 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1360 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1361 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1362 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1363 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1364 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1365 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1366 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1367 
1368 /* Vector Single-Width Integer Multiply Instructions */
1369 #define DO_MUL(N, M) (N * M)
1370 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1371 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1372 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1373 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1374 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1375 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1376 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1377 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1378 
1379 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1380 {
1381     return (int16_t)s2 * (int16_t)s1 >> 8;
1382 }
1383 
1384 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1385 {
1386     return (int32_t)s2 * (int32_t)s1 >> 16;
1387 }
1388 
1389 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1390 {
1391     return (int64_t)s2 * (int64_t)s1 >> 32;
1392 }
1393 
1394 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1395 {
1396     uint64_t hi_64, lo_64;
1397 
1398     muls64(&lo_64, &hi_64, s1, s2);
1399     return hi_64;
1400 }
1401 
1402 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1403 {
1404     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1405 }
1406 
1407 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1408 {
1409     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1410 }
1411 
1412 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1413 {
1414     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1415 }
1416 
1417 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1418 {
1419     uint64_t hi_64, lo_64;
1420 
1421     mulu64(&lo_64, &hi_64, s2, s1);
1422     return hi_64;
1423 }
1424 
1425 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1426 {
1427     return (int16_t)s2 * (uint16_t)s1 >> 8;
1428 }
1429 
1430 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1431 {
1432     return (int32_t)s2 * (uint32_t)s1 >> 16;
1433 }
1434 
1435 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1436 {
1437     return (int64_t)s2 * (uint64_t)s1 >> 32;
1438 }
1439 
1440 /*
1441  * Let  A = signed operand,
1442  *      B = unsigned operand
1443  *      P = mulu64(A, B), unsigned product
1444  *
1445  * LET  X = 2 ** 64  - A, 2's complement of A
1446  *      SP = signed product
1447  * THEN
1448  *      IF A < 0
1449  *          SP = -X * B
1450  *             = -(2 ** 64 - A) * B
1451  *             = A * B - 2 ** 64 * B
1452  *             = P - 2 ** 64 * B
1453  *      ELSE
1454  *          SP = P
1455  * THEN
1456  *      HI_P -= (A < 0 ? B : 0)
1457  */
1458 
1459 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1460 {
1461     uint64_t hi_64, lo_64;
1462 
1463     mulu64(&lo_64, &hi_64, s2, s1);
1464 
1465     hi_64 -= s2 < 0 ? s1 : 0;
1466     return hi_64;
1467 }
1468 
1469 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1470 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1471 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1472 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1473 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1474 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1475 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1476 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1477 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1478 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1479 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1480 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1481 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1482 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1483 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1484 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1485 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1486 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1487 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1488 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1489 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1490 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1491 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1492 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1493 
1494 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1495 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1496 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1497 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1498 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1499 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1500 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1501 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1502 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1503 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1504 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1505 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1506 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1507 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1508 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1509 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1510 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1511 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1512 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1513 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1514 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1515 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1516 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1517 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1518 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1519 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1520 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1521 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1522 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1523 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1524 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1525 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1526 
1527 /* Vector Integer Divide Instructions */
1528 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1529 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1530 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1531         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1532 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1533         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1534 
1535 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1536 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1537 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1538 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1539 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1540 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1541 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1542 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1543 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1544 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1545 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1546 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1547 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1548 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1549 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1550 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1551 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1552 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1553 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1554 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1555 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1556 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1557 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1558 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1559 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1560 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1561 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1562 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1563 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1564 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1565 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1566 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1567 
1568 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1569 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1570 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1571 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1572 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1573 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1574 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1575 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1576 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1577 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1578 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1579 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1580 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1581 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1582 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1583 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1584 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1585 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1586 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1587 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1588 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1589 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1590 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1591 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1592 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1593 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1594 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1595 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1596 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1597 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1598 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1599 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1600 
1601 /* Vector Widening Integer Multiply Instructions */
1602 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1603 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1604 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1605 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1606 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1607 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1608 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1609 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1610 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1611 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1612 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1613 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1614 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1615 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1616 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1617 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1618 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1619 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1620 
1621 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1622 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1623 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1624 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1625 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1626 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1627 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1628 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1629 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1630 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1631 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1632 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1633 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1634 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1635 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1636 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1637 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1638 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1639 
1640 /* Vector Single-Width Integer Multiply-Add Instructions */
1641 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1642 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1643 {                                                                  \
1644     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1645     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1646     TD d = *((TD *)vd + HD(i));                                    \
1647     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1648 }
1649 
1650 #define DO_MACC(N, M, D) (M * N + D)
1651 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1652 #define DO_MADD(N, M, D) (M * D + N)
1653 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1654 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1655 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1656 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1657 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1658 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1659 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1660 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1661 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1662 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1663 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1664 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1665 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1666 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1667 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1668 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1669 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1670 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1671 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1672 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1673 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1674 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1675 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1676 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1677 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1678 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1679 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1680 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1681 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1682 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1683 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1684 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1685 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1686 
1687 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1688 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1689 {                                                                   \
1690     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1691     TD d = *((TD *)vd + HD(i));                                     \
1692     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1693 }
1694 
1695 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1696 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1697 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1698 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1699 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1700 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1701 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1702 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1703 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1704 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1705 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1706 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1707 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1708 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1709 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1710 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1711 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1712 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1713 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1714 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1715 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1716 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1717 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1718 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1719 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1720 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1721 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1722 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1723 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1724 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1725 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1726 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1727 
1728 /* Vector Widening Integer Multiply-Add Instructions */
1729 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1730 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1731 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1732 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1733 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1734 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1735 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1736 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1737 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1738 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1739 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1740 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1741 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1742 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1743 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1744 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1745 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1746 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1747 
1748 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1749 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1750 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1751 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1752 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1753 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1754 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1755 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1756 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1757 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1758 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1759 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1760 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1761 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1762 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1763 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1764 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1765 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1766 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1767 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1768 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1769 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1770 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1771 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1772 
1773 /* Vector Integer Merge and Move Instructions */
1774 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1775 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1776                   uint32_t desc)                                     \
1777 {                                                                    \
1778     uint32_t vl = env->vl;                                           \
1779     uint32_t i;                                                      \
1780                                                                      \
1781     for (i = 0; i < vl; i++) {                                       \
1782         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1783         *((ETYPE *)vd + H(i)) = s1;                                  \
1784     }                                                                \
1785 }
1786 
1787 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1788 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1789 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1790 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1791 
1792 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1793 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1794                   uint32_t desc)                                     \
1795 {                                                                    \
1796     uint32_t vl = env->vl;                                           \
1797     uint32_t i;                                                      \
1798                                                                      \
1799     for (i = 0; i < vl; i++) {                                       \
1800         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1801     }                                                                \
1802 }
1803 
1804 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1805 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1806 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1807 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1808 
1809 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1810 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1811                   CPURISCVState *env, uint32_t desc)                 \
1812 {                                                                    \
1813     uint32_t vl = env->vl;                                           \
1814     uint32_t i;                                                      \
1815                                                                      \
1816     for (i = 0; i < vl; i++) {                                       \
1817         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1818         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1819     }                                                                \
1820 }
1821 
1822 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1823 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1824 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1825 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1826 
1827 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1828 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1829                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1830 {                                                                    \
1831     uint32_t vl = env->vl;                                           \
1832     uint32_t i;                                                      \
1833                                                                      \
1834     for (i = 0; i < vl; i++) {                                       \
1835         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1836         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1837                    (ETYPE)(target_long)s1);                          \
1838         *((ETYPE *)vd + H(i)) = d;                                   \
1839     }                                                                \
1840 }
1841 
1842 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1843 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1844 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1845 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1846 
1847 /*
1848  *** Vector Fixed-Point Arithmetic Instructions
1849  */
1850 
1851 /* Vector Single-Width Saturating Add and Subtract */
1852 
1853 /*
1854  * As fixed point instructions probably have round mode and saturation,
1855  * define common macros for fixed point here.
1856  */
1857 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1858                           CPURISCVState *env, int vxrm);
1859 
1860 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1861 static inline void                                                  \
1862 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1863           CPURISCVState *env, int vxrm)                             \
1864 {                                                                   \
1865     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1866     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1867     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1868 }
1869 
1870 static inline void
1871 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1872              CPURISCVState *env,
1873              uint32_t vl, uint32_t vm, int vxrm,
1874              opivv2_rm_fn *fn)
1875 {
1876     for (uint32_t i = 0; i < vl; i++) {
1877         if (!vm && !vext_elem_mask(v0, i)) {
1878             continue;
1879         }
1880         fn(vd, vs1, vs2, i, env, vxrm);
1881     }
1882 }
1883 
1884 static inline void
1885 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1886              CPURISCVState *env,
1887              uint32_t desc, uint32_t esz, uint32_t dsz,
1888              opivv2_rm_fn *fn)
1889 {
1890     uint32_t vm = vext_vm(desc);
1891     uint32_t vl = env->vl;
1892 
1893     switch (env->vxrm) {
1894     case 0: /* rnu */
1895         vext_vv_rm_1(vd, v0, vs1, vs2,
1896                      env, vl, vm, 0, fn);
1897         break;
1898     case 1: /* rne */
1899         vext_vv_rm_1(vd, v0, vs1, vs2,
1900                      env, vl, vm, 1, fn);
1901         break;
1902     case 2: /* rdn */
1903         vext_vv_rm_1(vd, v0, vs1, vs2,
1904                      env, vl, vm, 2, fn);
1905         break;
1906     default: /* rod */
1907         vext_vv_rm_1(vd, v0, vs1, vs2,
1908                      env, vl, vm, 3, fn);
1909         break;
1910     }
1911 }
1912 
1913 /* generate helpers for fixed point instructions with OPIVV format */
1914 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1915 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1916                   CPURISCVState *env, uint32_t desc)            \
1917 {                                                               \
1918     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1919                  do_##NAME);                                    \
1920 }
1921 
1922 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1923 {
1924     uint8_t res = a + b;
1925     if (res < a) {
1926         res = UINT8_MAX;
1927         env->vxsat = 0x1;
1928     }
1929     return res;
1930 }
1931 
1932 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1933                                uint16_t b)
1934 {
1935     uint16_t res = a + b;
1936     if (res < a) {
1937         res = UINT16_MAX;
1938         env->vxsat = 0x1;
1939     }
1940     return res;
1941 }
1942 
1943 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1944                                uint32_t b)
1945 {
1946     uint32_t res = a + b;
1947     if (res < a) {
1948         res = UINT32_MAX;
1949         env->vxsat = 0x1;
1950     }
1951     return res;
1952 }
1953 
1954 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1955                                uint64_t b)
1956 {
1957     uint64_t res = a + b;
1958     if (res < a) {
1959         res = UINT64_MAX;
1960         env->vxsat = 0x1;
1961     }
1962     return res;
1963 }
1964 
1965 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1966 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1967 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
1968 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
1969 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
1970 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
1971 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
1972 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
1973 
1974 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
1975                           CPURISCVState *env, int vxrm);
1976 
1977 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
1978 static inline void                                                  \
1979 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
1980           CPURISCVState *env, int vxrm)                             \
1981 {                                                                   \
1982     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1983     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
1984 }
1985 
1986 static inline void
1987 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
1988              CPURISCVState *env,
1989              uint32_t vl, uint32_t vm, int vxrm,
1990              opivx2_rm_fn *fn)
1991 {
1992     for (uint32_t i = 0; i < vl; i++) {
1993         if (!vm && !vext_elem_mask(v0, i)) {
1994             continue;
1995         }
1996         fn(vd, s1, vs2, i, env, vxrm);
1997     }
1998 }
1999 
2000 static inline void
2001 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2002              CPURISCVState *env,
2003              uint32_t desc, uint32_t esz, uint32_t dsz,
2004              opivx2_rm_fn *fn)
2005 {
2006     uint32_t vm = vext_vm(desc);
2007     uint32_t vl = env->vl;
2008 
2009     switch (env->vxrm) {
2010     case 0: /* rnu */
2011         vext_vx_rm_1(vd, v0, s1, vs2,
2012                      env, vl, vm, 0, fn);
2013         break;
2014     case 1: /* rne */
2015         vext_vx_rm_1(vd, v0, s1, vs2,
2016                      env, vl, vm, 1, fn);
2017         break;
2018     case 2: /* rdn */
2019         vext_vx_rm_1(vd, v0, s1, vs2,
2020                      env, vl, vm, 2, fn);
2021         break;
2022     default: /* rod */
2023         vext_vx_rm_1(vd, v0, s1, vs2,
2024                      env, vl, vm, 3, fn);
2025         break;
2026     }
2027 }
2028 
2029 /* generate helpers for fixed point instructions with OPIVX format */
2030 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2031 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2032         void *vs2, CPURISCVState *env, uint32_t desc)     \
2033 {                                                         \
2034     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2035                  do_##NAME);                              \
2036 }
2037 
2038 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2039 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2040 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2041 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2042 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2043 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2044 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2045 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2046 
2047 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2048 {
2049     int8_t res = a + b;
2050     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2051         res = a > 0 ? INT8_MAX : INT8_MIN;
2052         env->vxsat = 0x1;
2053     }
2054     return res;
2055 }
2056 
2057 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2058 {
2059     int16_t res = a + b;
2060     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2061         res = a > 0 ? INT16_MAX : INT16_MIN;
2062         env->vxsat = 0x1;
2063     }
2064     return res;
2065 }
2066 
2067 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2068 {
2069     int32_t res = a + b;
2070     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2071         res = a > 0 ? INT32_MAX : INT32_MIN;
2072         env->vxsat = 0x1;
2073     }
2074     return res;
2075 }
2076 
2077 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2078 {
2079     int64_t res = a + b;
2080     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2081         res = a > 0 ? INT64_MAX : INT64_MIN;
2082         env->vxsat = 0x1;
2083     }
2084     return res;
2085 }
2086 
2087 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2088 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2089 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2090 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2091 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2092 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2093 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2094 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2095 
2096 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2097 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2098 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2099 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2100 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2101 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2102 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2103 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2104 
2105 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2106 {
2107     uint8_t res = a - b;
2108     if (res > a) {
2109         res = 0;
2110         env->vxsat = 0x1;
2111     }
2112     return res;
2113 }
2114 
2115 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2116                                uint16_t b)
2117 {
2118     uint16_t res = a - b;
2119     if (res > a) {
2120         res = 0;
2121         env->vxsat = 0x1;
2122     }
2123     return res;
2124 }
2125 
2126 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2127                                uint32_t b)
2128 {
2129     uint32_t res = a - b;
2130     if (res > a) {
2131         res = 0;
2132         env->vxsat = 0x1;
2133     }
2134     return res;
2135 }
2136 
2137 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2138                                uint64_t b)
2139 {
2140     uint64_t res = a - b;
2141     if (res > a) {
2142         res = 0;
2143         env->vxsat = 0x1;
2144     }
2145     return res;
2146 }
2147 
2148 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2149 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2150 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2151 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2152 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2153 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2154 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2155 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2156 
2157 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2158 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2159 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2160 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2161 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2162 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2163 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2164 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2165 
2166 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2167 {
2168     int8_t res = a - b;
2169     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2170         res = a >= 0 ? INT8_MAX : INT8_MIN;
2171         env->vxsat = 0x1;
2172     }
2173     return res;
2174 }
2175 
2176 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2177 {
2178     int16_t res = a - b;
2179     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2180         res = a >= 0 ? INT16_MAX : INT16_MIN;
2181         env->vxsat = 0x1;
2182     }
2183     return res;
2184 }
2185 
2186 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2187 {
2188     int32_t res = a - b;
2189     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2190         res = a >= 0 ? INT32_MAX : INT32_MIN;
2191         env->vxsat = 0x1;
2192     }
2193     return res;
2194 }
2195 
2196 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2197 {
2198     int64_t res = a - b;
2199     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2200         res = a >= 0 ? INT64_MAX : INT64_MIN;
2201         env->vxsat = 0x1;
2202     }
2203     return res;
2204 }
2205 
2206 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2207 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2208 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2209 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2210 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2211 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2212 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2213 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2214 
2215 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2216 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2217 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2218 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2219 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2220 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2221 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2222 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2223 
2224 /* Vector Single-Width Averaging Add and Subtract */
2225 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2226 {
2227     uint8_t d = extract64(v, shift, 1);
2228     uint8_t d1;
2229     uint64_t D1, D2;
2230 
2231     if (shift == 0 || shift > 64) {
2232         return 0;
2233     }
2234 
2235     d1 = extract64(v, shift - 1, 1);
2236     D1 = extract64(v, 0, shift);
2237     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2238         return d1;
2239     } else if (vxrm == 1) { /* round-to-nearest-even */
2240         if (shift > 1) {
2241             D2 = extract64(v, 0, shift - 1);
2242             return d1 & ((D2 != 0) | d);
2243         } else {
2244             return d1 & d;
2245         }
2246     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2247         return !d & (D1 != 0);
2248     }
2249     return 0; /* round-down (truncate) */
2250 }
2251 
2252 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2253 {
2254     int64_t res = (int64_t)a + b;
2255     uint8_t round = get_round(vxrm, res, 1);
2256 
2257     return (res >> 1) + round;
2258 }
2259 
2260 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2261 {
2262     int64_t res = a + b;
2263     uint8_t round = get_round(vxrm, res, 1);
2264     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2265 
2266     /* With signed overflow, bit 64 is inverse of bit 63. */
2267     return ((res >> 1) ^ over) + round;
2268 }
2269 
2270 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2271 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2272 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2273 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2274 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2275 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2276 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2277 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2278 
2279 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2280 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2281 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2282 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2283 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2284 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2285 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2286 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2287 
2288 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2289 {
2290     int64_t res = (int64_t)a - b;
2291     uint8_t round = get_round(vxrm, res, 1);
2292 
2293     return (res >> 1) + round;
2294 }
2295 
2296 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2297 {
2298     int64_t res = (int64_t)a - b;
2299     uint8_t round = get_round(vxrm, res, 1);
2300     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2301 
2302     /* With signed overflow, bit 64 is inverse of bit 63. */
2303     return ((res >> 1) ^ over) + round;
2304 }
2305 
2306 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2307 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2308 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2309 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2310 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2311 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2312 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2313 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2314 
2315 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2316 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2317 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2318 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2319 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2320 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2321 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2322 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2323 
2324 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2325 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2326 {
2327     uint8_t round;
2328     int16_t res;
2329 
2330     res = (int16_t)a * (int16_t)b;
2331     round = get_round(vxrm, res, 7);
2332     res   = (res >> 7) + round;
2333 
2334     if (res > INT8_MAX) {
2335         env->vxsat = 0x1;
2336         return INT8_MAX;
2337     } else if (res < INT8_MIN) {
2338         env->vxsat = 0x1;
2339         return INT8_MIN;
2340     } else {
2341         return res;
2342     }
2343 }
2344 
2345 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2346 {
2347     uint8_t round;
2348     int32_t res;
2349 
2350     res = (int32_t)a * (int32_t)b;
2351     round = get_round(vxrm, res, 15);
2352     res   = (res >> 15) + round;
2353 
2354     if (res > INT16_MAX) {
2355         env->vxsat = 0x1;
2356         return INT16_MAX;
2357     } else if (res < INT16_MIN) {
2358         env->vxsat = 0x1;
2359         return INT16_MIN;
2360     } else {
2361         return res;
2362     }
2363 }
2364 
2365 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2366 {
2367     uint8_t round;
2368     int64_t res;
2369 
2370     res = (int64_t)a * (int64_t)b;
2371     round = get_round(vxrm, res, 31);
2372     res   = (res >> 31) + round;
2373 
2374     if (res > INT32_MAX) {
2375         env->vxsat = 0x1;
2376         return INT32_MAX;
2377     } else if (res < INT32_MIN) {
2378         env->vxsat = 0x1;
2379         return INT32_MIN;
2380     } else {
2381         return res;
2382     }
2383 }
2384 
2385 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2386 {
2387     uint8_t round;
2388     uint64_t hi_64, lo_64;
2389     int64_t res;
2390 
2391     if (a == INT64_MIN && b == INT64_MIN) {
2392         env->vxsat = 1;
2393         return INT64_MAX;
2394     }
2395 
2396     muls64(&lo_64, &hi_64, a, b);
2397     round = get_round(vxrm, lo_64, 63);
2398     /*
2399      * Cannot overflow, as there are always
2400      * 2 sign bits after multiply.
2401      */
2402     res = (hi_64 << 1) | (lo_64 >> 63);
2403     if (round) {
2404         if (res == INT64_MAX) {
2405             env->vxsat = 1;
2406         } else {
2407             res += 1;
2408         }
2409     }
2410     return res;
2411 }
2412 
2413 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2414 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2415 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2416 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2417 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2418 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2419 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2420 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2421 
2422 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2423 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2424 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2425 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2426 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2427 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2428 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2429 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2430 
2431 /* Vector Widening Saturating Scaled Multiply-Add */
2432 static inline uint16_t
2433 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b,
2434           uint16_t c)
2435 {
2436     uint8_t round;
2437     uint16_t res = (uint16_t)a * b;
2438 
2439     round = get_round(vxrm, res, 4);
2440     res   = (res >> 4) + round;
2441     return saddu16(env, vxrm, c, res);
2442 }
2443 
2444 static inline uint32_t
2445 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b,
2446            uint32_t c)
2447 {
2448     uint8_t round;
2449     uint32_t res = (uint32_t)a * b;
2450 
2451     round = get_round(vxrm, res, 8);
2452     res   = (res >> 8) + round;
2453     return saddu32(env, vxrm, c, res);
2454 }
2455 
2456 static inline uint64_t
2457 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b,
2458            uint64_t c)
2459 {
2460     uint8_t round;
2461     uint64_t res = (uint64_t)a * b;
2462 
2463     round = get_round(vxrm, res, 16);
2464     res   = (res >> 16) + round;
2465     return saddu64(env, vxrm, c, res);
2466 }
2467 
2468 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
2469 static inline void                                                 \
2470 do_##NAME(void *vd, void *vs1, void *vs2, int i,                   \
2471           CPURISCVState *env, int vxrm)                            \
2472 {                                                                  \
2473     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
2474     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
2475     TD d = *((TD *)vd + HD(i));                                    \
2476     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d);                \
2477 }
2478 
2479 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8)
2480 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16)
2481 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32)
2482 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2)
2483 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4)
2484 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8)
2485 
2486 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)         \
2487 static inline void                                                 \
2488 do_##NAME(void *vd, target_long s1, void *vs2, int i,              \
2489           CPURISCVState *env, int vxrm)                            \
2490 {                                                                  \
2491     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
2492     TD d = *((TD *)vd + HD(i));                                    \
2493     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d);       \
2494 }
2495 
2496 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8)
2497 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16)
2498 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32)
2499 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2)
2500 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4)
2501 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8)
2502 
2503 static inline int16_t
2504 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c)
2505 {
2506     uint8_t round;
2507     int16_t res = (int16_t)a * b;
2508 
2509     round = get_round(vxrm, res, 4);
2510     res   = (res >> 4) + round;
2511     return sadd16(env, vxrm, c, res);
2512 }
2513 
2514 static inline int32_t
2515 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c)
2516 {
2517     uint8_t round;
2518     int32_t res = (int32_t)a * b;
2519 
2520     round = get_round(vxrm, res, 8);
2521     res   = (res >> 8) + round;
2522     return sadd32(env, vxrm, c, res);
2523 
2524 }
2525 
2526 static inline int64_t
2527 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c)
2528 {
2529     uint8_t round;
2530     int64_t res = (int64_t)a * b;
2531 
2532     round = get_round(vxrm, res, 16);
2533     res   = (res >> 16) + round;
2534     return sadd64(env, vxrm, c, res);
2535 }
2536 
2537 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8)
2538 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16)
2539 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32)
2540 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2)
2541 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4)
2542 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8)
2543 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8)
2544 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16)
2545 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32)
2546 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2)
2547 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4)
2548 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8)
2549 
2550 static inline int16_t
2551 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c)
2552 {
2553     uint8_t round;
2554     int16_t res = a * (int16_t)b;
2555 
2556     round = get_round(vxrm, res, 4);
2557     res   = (res >> 4) + round;
2558     return ssub16(env, vxrm, c, res);
2559 }
2560 
2561 static inline int32_t
2562 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c)
2563 {
2564     uint8_t round;
2565     int32_t res = a * (int32_t)b;
2566 
2567     round = get_round(vxrm, res, 8);
2568     res   = (res >> 8) + round;
2569     return ssub32(env, vxrm, c, res);
2570 }
2571 
2572 static inline int64_t
2573 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c)
2574 {
2575     uint8_t round;
2576     int64_t res = a * (int64_t)b;
2577 
2578     round = get_round(vxrm, res, 16);
2579     res   = (res >> 16) + round;
2580     return ssub64(env, vxrm, c, res);
2581 }
2582 
2583 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8)
2584 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16)
2585 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32)
2586 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2)
2587 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4)
2588 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8)
2589 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8)
2590 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16)
2591 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32)
2592 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2)
2593 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4)
2594 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8)
2595 
2596 static inline int16_t
2597 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c)
2598 {
2599     uint8_t round;
2600     int16_t res = (int16_t)a * b;
2601 
2602     round = get_round(vxrm, res, 4);
2603     res   = (res >> 4) + round;
2604     return ssub16(env, vxrm, c, res);
2605 }
2606 
2607 static inline int32_t
2608 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c)
2609 {
2610     uint8_t round;
2611     int32_t res = (int32_t)a * b;
2612 
2613     round = get_round(vxrm, res, 8);
2614     res   = (res >> 8) + round;
2615     return ssub32(env, vxrm, c, res);
2616 }
2617 
2618 static inline int64_t
2619 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c)
2620 {
2621     uint8_t round;
2622     int64_t res = (int64_t)a * b;
2623 
2624     round = get_round(vxrm, res, 16);
2625     res   = (res >> 16) + round;
2626     return ssub64(env, vxrm, c, res);
2627 }
2628 
2629 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8)
2630 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16)
2631 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32)
2632 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2)
2633 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4)
2634 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8)
2635 
2636 /* Vector Single-Width Scaling Shift Instructions */
2637 static inline uint8_t
2638 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2639 {
2640     uint8_t round, shift = b & 0x7;
2641     uint8_t res;
2642 
2643     round = get_round(vxrm, a, shift);
2644     res   = (a >> shift)  + round;
2645     return res;
2646 }
2647 static inline uint16_t
2648 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2649 {
2650     uint8_t round, shift = b & 0xf;
2651     uint16_t res;
2652 
2653     round = get_round(vxrm, a, shift);
2654     res   = (a >> shift)  + round;
2655     return res;
2656 }
2657 static inline uint32_t
2658 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2659 {
2660     uint8_t round, shift = b & 0x1f;
2661     uint32_t res;
2662 
2663     round = get_round(vxrm, a, shift);
2664     res   = (a >> shift)  + round;
2665     return res;
2666 }
2667 static inline uint64_t
2668 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2669 {
2670     uint8_t round, shift = b & 0x3f;
2671     uint64_t res;
2672 
2673     round = get_round(vxrm, a, shift);
2674     res   = (a >> shift)  + round;
2675     return res;
2676 }
2677 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2678 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2679 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2680 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2681 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2682 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2683 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2684 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2685 
2686 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2687 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2688 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2689 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2690 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2691 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2692 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2693 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2694 
2695 static inline int8_t
2696 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2697 {
2698     uint8_t round, shift = b & 0x7;
2699     int8_t res;
2700 
2701     round = get_round(vxrm, a, shift);
2702     res   = (a >> shift)  + round;
2703     return res;
2704 }
2705 static inline int16_t
2706 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2707 {
2708     uint8_t round, shift = b & 0xf;
2709     int16_t res;
2710 
2711     round = get_round(vxrm, a, shift);
2712     res   = (a >> shift)  + round;
2713     return res;
2714 }
2715 static inline int32_t
2716 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2717 {
2718     uint8_t round, shift = b & 0x1f;
2719     int32_t res;
2720 
2721     round = get_round(vxrm, a, shift);
2722     res   = (a >> shift)  + round;
2723     return res;
2724 }
2725 static inline int64_t
2726 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2727 {
2728     uint8_t round, shift = b & 0x3f;
2729     int64_t res;
2730 
2731     round = get_round(vxrm, a, shift);
2732     res   = (a >> shift)  + round;
2733     return res;
2734 }
2735 
2736 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2737 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2738 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2739 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2740 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2741 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2742 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2743 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2744 
2745 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2746 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2747 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2748 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2749 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2750 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2751 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2752 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2753 
2754 /* Vector Narrowing Fixed-Point Clip Instructions */
2755 static inline int8_t
2756 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2757 {
2758     uint8_t round, shift = b & 0xf;
2759     int16_t res;
2760 
2761     round = get_round(vxrm, a, shift);
2762     res   = (a >> shift)  + round;
2763     if (res > INT8_MAX) {
2764         env->vxsat = 0x1;
2765         return INT8_MAX;
2766     } else if (res < INT8_MIN) {
2767         env->vxsat = 0x1;
2768         return INT8_MIN;
2769     } else {
2770         return res;
2771     }
2772 }
2773 
2774 static inline int16_t
2775 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2776 {
2777     uint8_t round, shift = b & 0x1f;
2778     int32_t res;
2779 
2780     round = get_round(vxrm, a, shift);
2781     res   = (a >> shift)  + round;
2782     if (res > INT16_MAX) {
2783         env->vxsat = 0x1;
2784         return INT16_MAX;
2785     } else if (res < INT16_MIN) {
2786         env->vxsat = 0x1;
2787         return INT16_MIN;
2788     } else {
2789         return res;
2790     }
2791 }
2792 
2793 static inline int32_t
2794 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2795 {
2796     uint8_t round, shift = b & 0x3f;
2797     int64_t res;
2798 
2799     round = get_round(vxrm, a, shift);
2800     res   = (a >> shift)  + round;
2801     if (res > INT32_MAX) {
2802         env->vxsat = 0x1;
2803         return INT32_MAX;
2804     } else if (res < INT32_MIN) {
2805         env->vxsat = 0x1;
2806         return INT32_MIN;
2807     } else {
2808         return res;
2809     }
2810 }
2811 
2812 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2813 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2814 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2815 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1)
2816 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2)
2817 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4)
2818 
2819 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8)
2820 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16)
2821 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32)
2822 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1)
2823 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2)
2824 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4)
2825 
2826 static inline uint8_t
2827 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2828 {
2829     uint8_t round, shift = b & 0xf;
2830     uint16_t res;
2831 
2832     round = get_round(vxrm, a, shift);
2833     res   = (a >> shift)  + round;
2834     if (res > UINT8_MAX) {
2835         env->vxsat = 0x1;
2836         return UINT8_MAX;
2837     } else {
2838         return res;
2839     }
2840 }
2841 
2842 static inline uint16_t
2843 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2844 {
2845     uint8_t round, shift = b & 0x1f;
2846     uint32_t res;
2847 
2848     round = get_round(vxrm, a, shift);
2849     res   = (a >> shift)  + round;
2850     if (res > UINT16_MAX) {
2851         env->vxsat = 0x1;
2852         return UINT16_MAX;
2853     } else {
2854         return res;
2855     }
2856 }
2857 
2858 static inline uint32_t
2859 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2860 {
2861     uint8_t round, shift = b & 0x3f;
2862     int64_t res;
2863 
2864     round = get_round(vxrm, a, shift);
2865     res   = (a >> shift)  + round;
2866     if (res > UINT32_MAX) {
2867         env->vxsat = 0x1;
2868         return UINT32_MAX;
2869     } else {
2870         return res;
2871     }
2872 }
2873 
2874 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2875 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2876 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2877 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1)
2878 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2)
2879 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4)
2880 
2881 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8)
2882 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16)
2883 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32)
2884 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1)
2885 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2)
2886 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4)
2887 
2888 /*
2889  *** Vector Float Point Arithmetic Instructions
2890  */
2891 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2892 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2893 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2894                       CPURISCVState *env)                      \
2895 {                                                              \
2896     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2897     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2898     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2899 }
2900 
2901 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2902 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2903                   void *vs2, CPURISCVState *env,          \
2904                   uint32_t desc)                          \
2905 {                                                         \
2906     uint32_t vm = vext_vm(desc);                          \
2907     uint32_t vl = env->vl;                                \
2908     uint32_t i;                                           \
2909                                                           \
2910     for (i = 0; i < vl; i++) {                            \
2911         if (!vm && !vext_elem_mask(v0, i)) {              \
2912             continue;                                     \
2913         }                                                 \
2914         do_##NAME(vd, vs1, vs2, i, env);                  \
2915     }                                                     \
2916 }
2917 
2918 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2919 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2920 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2921 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2922 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2923 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2924 
2925 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2926 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2927                       CPURISCVState *env)                      \
2928 {                                                              \
2929     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2930     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2931 }
2932 
2933 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2934 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2935                   void *vs2, CPURISCVState *env,          \
2936                   uint32_t desc)                          \
2937 {                                                         \
2938     uint32_t vm = vext_vm(desc);                          \
2939     uint32_t vl = env->vl;                                \
2940     uint32_t i;                                           \
2941                                                           \
2942     for (i = 0; i < vl; i++) {                            \
2943         if (!vm && !vext_elem_mask(v0, i)) {              \
2944             continue;                                     \
2945         }                                                 \
2946         do_##NAME(vd, s1, vs2, i, env);                   \
2947     }                                                     \
2948 }
2949 
2950 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2951 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2952 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2953 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2954 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2955 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2956 
2957 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2958 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2959 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2960 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2961 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2962 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2963 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2964 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2965 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2966 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2967 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2968 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2969 
2970 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2971 {
2972     return float16_sub(b, a, s);
2973 }
2974 
2975 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2976 {
2977     return float32_sub(b, a, s);
2978 }
2979 
2980 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2981 {
2982     return float64_sub(b, a, s);
2983 }
2984 
2985 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2986 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2987 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2988 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2989 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2990 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2991 
2992 /* Vector Widening Floating-Point Add/Subtract Instructions */
2993 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2994 {
2995     return float32_add(float16_to_float32(a, true, s),
2996             float16_to_float32(b, true, s), s);
2997 }
2998 
2999 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3000 {
3001     return float64_add(float32_to_float64(a, s),
3002             float32_to_float64(b, s), s);
3003 
3004 }
3005 
3006 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3007 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3008 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
3009 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
3010 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3011 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3012 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
3013 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
3014 
3015 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3016 {
3017     return float32_sub(float16_to_float32(a, true, s),
3018             float16_to_float32(b, true, s), s);
3019 }
3020 
3021 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3022 {
3023     return float64_sub(float32_to_float64(a, s),
3024             float32_to_float64(b, s), s);
3025 
3026 }
3027 
3028 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3029 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3030 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
3031 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
3032 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3033 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3034 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
3035 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
3036 
3037 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3038 {
3039     return float32_add(a, float16_to_float32(b, true, s), s);
3040 }
3041 
3042 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3043 {
3044     return float64_add(a, float32_to_float64(b, s), s);
3045 }
3046 
3047 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3048 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3049 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
3050 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
3051 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3052 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3053 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
3054 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
3055 
3056 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3057 {
3058     return float32_sub(a, float16_to_float32(b, true, s), s);
3059 }
3060 
3061 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3062 {
3063     return float64_sub(a, float32_to_float64(b, s), s);
3064 }
3065 
3066 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3067 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3068 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
3069 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
3070 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3071 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3072 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
3073 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
3074 
3075 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3076 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3077 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3078 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3079 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
3080 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
3081 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
3082 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3083 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3084 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3085 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
3086 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
3087 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
3088 
3089 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3090 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3091 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3092 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
3093 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
3094 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
3095 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3096 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3097 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3098 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
3099 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3100 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3101 
3102 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3103 {
3104     return float16_div(b, a, s);
3105 }
3106 
3107 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3108 {
3109     return float32_div(b, a, s);
3110 }
3111 
3112 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3113 {
3114     return float64_div(b, a, s);
3115 }
3116 
3117 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3118 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3119 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3120 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3121 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3122 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3123 
3124 /* Vector Widening Floating-Point Multiply */
3125 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3126 {
3127     return float32_mul(float16_to_float32(a, true, s),
3128             float16_to_float32(b, true, s), s);
3129 }
3130 
3131 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3132 {
3133     return float64_mul(float32_to_float64(a, s),
3134             float32_to_float64(b, s), s);
3135 
3136 }
3137 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3138 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3139 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3140 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3141 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3142 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3143 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3144 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3145 
3146 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3147 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3148 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3149         CPURISCVState *env)                                        \
3150 {                                                                  \
3151     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3152     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3153     TD d = *((TD *)vd + HD(i));                                    \
3154     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3155 }
3156 
3157 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3158 {
3159     return float16_muladd(a, b, d, 0, s);
3160 }
3161 
3162 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3163 {
3164     return float32_muladd(a, b, d, 0, s);
3165 }
3166 
3167 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3168 {
3169     return float64_muladd(a, b, d, 0, s);
3170 }
3171 
3172 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3173 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3174 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3175 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3176 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3177 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3178 
3179 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3180 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3181         CPURISCVState *env)                                       \
3182 {                                                                 \
3183     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3184     TD d = *((TD *)vd + HD(i));                                   \
3185     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3186 }
3187 
3188 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3189 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3190 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3191 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3192 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3193 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3194 
3195 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3196 {
3197     return float16_muladd(a, b, d,
3198             float_muladd_negate_c | float_muladd_negate_product, s);
3199 }
3200 
3201 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3202 {
3203     return float32_muladd(a, b, d,
3204             float_muladd_negate_c | float_muladd_negate_product, s);
3205 }
3206 
3207 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3208 {
3209     return float64_muladd(a, b, d,
3210             float_muladd_negate_c | float_muladd_negate_product, s);
3211 }
3212 
3213 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3214 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3215 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3216 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3217 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3218 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3219 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3220 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3221 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3222 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3223 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3224 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3225 
3226 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3227 {
3228     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3229 }
3230 
3231 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3232 {
3233     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3234 }
3235 
3236 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3237 {
3238     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3239 }
3240 
3241 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3242 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3243 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3244 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3245 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3246 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3247 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3248 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3249 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3250 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3251 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3252 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3253 
3254 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3255 {
3256     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3257 }
3258 
3259 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3260 {
3261     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3262 }
3263 
3264 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3265 {
3266     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3267 }
3268 
3269 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3270 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3271 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3272 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3273 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3274 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3275 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3276 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3277 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3278 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3279 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3280 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3281 
3282 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3283 {
3284     return float16_muladd(d, b, a, 0, s);
3285 }
3286 
3287 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3288 {
3289     return float32_muladd(d, b, a, 0, s);
3290 }
3291 
3292 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3293 {
3294     return float64_muladd(d, b, a, 0, s);
3295 }
3296 
3297 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3298 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3299 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3300 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3301 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3302 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3303 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3304 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3305 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3306 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3307 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3308 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3309 
3310 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3311 {
3312     return float16_muladd(d, b, a,
3313             float_muladd_negate_c | float_muladd_negate_product, s);
3314 }
3315 
3316 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3317 {
3318     return float32_muladd(d, b, a,
3319             float_muladd_negate_c | float_muladd_negate_product, s);
3320 }
3321 
3322 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3323 {
3324     return float64_muladd(d, b, a,
3325             float_muladd_negate_c | float_muladd_negate_product, s);
3326 }
3327 
3328 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3329 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3330 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3331 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3332 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3333 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3334 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3335 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3336 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3337 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3338 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3339 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3340 
3341 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3342 {
3343     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3344 }
3345 
3346 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3347 {
3348     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3349 }
3350 
3351 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3352 {
3353     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3354 }
3355 
3356 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3357 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3358 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3359 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3360 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3361 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3362 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3363 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3364 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3365 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3366 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3367 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3368 
3369 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3370 {
3371     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3372 }
3373 
3374 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3375 {
3376     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3377 }
3378 
3379 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3380 {
3381     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3382 }
3383 
3384 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3385 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3386 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3387 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3388 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3389 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3390 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3391 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3392 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3393 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3394 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3395 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3396 
3397 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3398 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3399 {
3400     return float32_muladd(float16_to_float32(a, true, s),
3401                         float16_to_float32(b, true, s), d, 0, s);
3402 }
3403 
3404 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3405 {
3406     return float64_muladd(float32_to_float64(a, s),
3407                         float32_to_float64(b, s), d, 0, s);
3408 }
3409 
3410 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3411 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3412 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3413 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3414 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3415 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3416 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3417 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3418 
3419 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3420 {
3421     return float32_muladd(float16_to_float32(a, true, s),
3422                         float16_to_float32(b, true, s), d,
3423                         float_muladd_negate_c | float_muladd_negate_product, s);
3424 }
3425 
3426 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3427 {
3428     return float64_muladd(float32_to_float64(a, s),
3429                         float32_to_float64(b, s), d,
3430                         float_muladd_negate_c | float_muladd_negate_product, s);
3431 }
3432 
3433 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3434 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3435 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3436 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3437 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3438 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3439 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3440 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3441 
3442 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3443 {
3444     return float32_muladd(float16_to_float32(a, true, s),
3445                         float16_to_float32(b, true, s), d,
3446                         float_muladd_negate_c, s);
3447 }
3448 
3449 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3450 {
3451     return float64_muladd(float32_to_float64(a, s),
3452                         float32_to_float64(b, s), d,
3453                         float_muladd_negate_c, s);
3454 }
3455 
3456 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3457 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3458 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3459 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3460 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3461 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3462 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3463 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3464 
3465 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3466 {
3467     return float32_muladd(float16_to_float32(a, true, s),
3468                         float16_to_float32(b, true, s), d,
3469                         float_muladd_negate_product, s);
3470 }
3471 
3472 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3473 {
3474     return float64_muladd(float32_to_float64(a, s),
3475                         float32_to_float64(b, s), d,
3476                         float_muladd_negate_product, s);
3477 }
3478 
3479 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3480 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3481 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3482 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3483 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3484 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3485 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3486 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3487 
3488 /* Vector Floating-Point Square-Root Instruction */
3489 /* (TD, T2, TX2) */
3490 #define OP_UU_H uint16_t, uint16_t, uint16_t
3491 #define OP_UU_W uint32_t, uint32_t, uint32_t
3492 #define OP_UU_D uint64_t, uint64_t, uint64_t
3493 
3494 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3495 static void do_##NAME(void *vd, void *vs2, int i,      \
3496         CPURISCVState *env)                            \
3497 {                                                      \
3498     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3499     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3500 }
3501 
3502 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3503 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3504         CPURISCVState *env, uint32_t desc)             \
3505 {                                                      \
3506     uint32_t vm = vext_vm(desc);                       \
3507     uint32_t vl = env->vl;                             \
3508     uint32_t i;                                        \
3509                                                        \
3510     if (vl == 0) {                                     \
3511         return;                                        \
3512     }                                                  \
3513     for (i = 0; i < vl; i++) {                         \
3514         if (!vm && !vext_elem_mask(v0, i)) {           \
3515             continue;                                  \
3516         }                                              \
3517         do_##NAME(vd, vs2, i, env);                    \
3518     }                                                  \
3519 }
3520 
3521 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3522 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3523 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3524 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3525 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3526 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3527 
3528 /* Vector Floating-Point MIN/MAX Instructions */
3529 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum)
3530 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum)
3531 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum)
3532 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3533 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3534 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3535 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum)
3536 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum)
3537 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum)
3538 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3539 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3540 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3541 
3542 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum)
3543 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum)
3544 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum)
3545 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3546 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3547 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3548 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum)
3549 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum)
3550 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum)
3551 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3552 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3553 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3554 
3555 /* Vector Floating-Point Sign-Injection Instructions */
3556 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3557 {
3558     return deposit64(b, 0, 15, a);
3559 }
3560 
3561 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3562 {
3563     return deposit64(b, 0, 31, a);
3564 }
3565 
3566 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3567 {
3568     return deposit64(b, 0, 63, a);
3569 }
3570 
3571 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3572 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3573 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3574 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3575 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3576 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3577 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3578 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3579 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3580 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3581 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3582 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3583 
3584 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3585 {
3586     return deposit64(~b, 0, 15, a);
3587 }
3588 
3589 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3590 {
3591     return deposit64(~b, 0, 31, a);
3592 }
3593 
3594 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3595 {
3596     return deposit64(~b, 0, 63, a);
3597 }
3598 
3599 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3600 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3601 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3602 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3603 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3604 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3605 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3606 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3607 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3608 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3609 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3610 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3611 
3612 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3613 {
3614     return deposit64(b ^ a, 0, 15, a);
3615 }
3616 
3617 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3618 {
3619     return deposit64(b ^ a, 0, 31, a);
3620 }
3621 
3622 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3623 {
3624     return deposit64(b ^ a, 0, 63, a);
3625 }
3626 
3627 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3628 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3629 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3630 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3631 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3632 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3633 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3634 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3635 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3636 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3637 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3638 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3639 
3640 /* Vector Floating-Point Compare Instructions */
3641 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3642 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3643                   CPURISCVState *env, uint32_t desc)          \
3644 {                                                             \
3645     uint32_t vm = vext_vm(desc);                              \
3646     uint32_t vl = env->vl;                                    \
3647     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
3648     uint32_t i;                                               \
3649                                                               \
3650     for (i = 0; i < vl; i++) {                                \
3651         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3652         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3653         if (!vm && !vext_elem_mask(v0, i)) {                  \
3654             continue;                                         \
3655         }                                                     \
3656         vext_set_elem_mask(vd, i,                             \
3657                            DO_OP(s2, s1, &env->fp_status));   \
3658     }                                                         \
3659     for (; i < vlmax; i++) {                                  \
3660         vext_set_elem_mask(vd, i, 0);                         \
3661     }                                                         \
3662 }
3663 
3664 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3665 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3666 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3667 
3668 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3669 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3670                   CPURISCVState *env, uint32_t desc)                \
3671 {                                                                   \
3672     uint32_t vm = vext_vm(desc);                                    \
3673     uint32_t vl = env->vl;                                          \
3674     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
3675     uint32_t i;                                                     \
3676                                                                     \
3677     for (i = 0; i < vl; i++) {                                      \
3678         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3679         if (!vm && !vext_elem_mask(v0, i)) {                        \
3680             continue;                                               \
3681         }                                                           \
3682         vext_set_elem_mask(vd, i,                                   \
3683                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3684     }                                                               \
3685     for (; i < vlmax; i++) {                                        \
3686         vext_set_elem_mask(vd, i, 0);                               \
3687     }                                                               \
3688 }
3689 
3690 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3691 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3692 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3693 
3694 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3695 {
3696     FloatRelation compare = float16_compare_quiet(a, b, s);
3697     return compare != float_relation_equal;
3698 }
3699 
3700 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3701 {
3702     FloatRelation compare = float32_compare_quiet(a, b, s);
3703     return compare != float_relation_equal;
3704 }
3705 
3706 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3707 {
3708     FloatRelation compare = float64_compare_quiet(a, b, s);
3709     return compare != float_relation_equal;
3710 }
3711 
3712 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3713 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3714 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3715 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3716 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3717 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3718 
3719 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3720 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3721 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3722 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3723 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3724 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3725 
3726 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3727 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
3728 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
3729 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
3730 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
3731 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
3732 
3733 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
3734 {
3735     FloatRelation compare = float16_compare(a, b, s);
3736     return compare == float_relation_greater;
3737 }
3738 
3739 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
3740 {
3741     FloatRelation compare = float32_compare(a, b, s);
3742     return compare == float_relation_greater;
3743 }
3744 
3745 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
3746 {
3747     FloatRelation compare = float64_compare(a, b, s);
3748     return compare == float_relation_greater;
3749 }
3750 
3751 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
3752 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
3753 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
3754 
3755 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
3756 {
3757     FloatRelation compare = float16_compare(a, b, s);
3758     return compare == float_relation_greater ||
3759            compare == float_relation_equal;
3760 }
3761 
3762 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
3763 {
3764     FloatRelation compare = float32_compare(a, b, s);
3765     return compare == float_relation_greater ||
3766            compare == float_relation_equal;
3767 }
3768 
3769 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
3770 {
3771     FloatRelation compare = float64_compare(a, b, s);
3772     return compare == float_relation_greater ||
3773            compare == float_relation_equal;
3774 }
3775 
3776 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
3777 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
3778 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
3779 
3780 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet)
3781 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet)
3782 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet)
3783 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet)
3784 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet)
3785 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet)
3786 
3787 /* Vector Floating-Point Classify Instruction */
3788 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
3789 static void do_##NAME(void *vd, void *vs2, int i)      \
3790 {                                                      \
3791     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3792     *((TD *)vd + HD(i)) = OP(s2);                      \
3793 }
3794 
3795 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
3796 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3797                   CPURISCVState *env, uint32_t desc)   \
3798 {                                                      \
3799     uint32_t vm = vext_vm(desc);                       \
3800     uint32_t vl = env->vl;                             \
3801     uint32_t i;                                        \
3802                                                        \
3803     for (i = 0; i < vl; i++) {                         \
3804         if (!vm && !vext_elem_mask(v0, i)) {           \
3805             continue;                                  \
3806         }                                              \
3807         do_##NAME(vd, vs2, i);                         \
3808     }                                                  \
3809 }
3810 
3811 target_ulong fclass_h(uint64_t frs1)
3812 {
3813     float16 f = frs1;
3814     bool sign = float16_is_neg(f);
3815 
3816     if (float16_is_infinity(f)) {
3817         return sign ? 1 << 0 : 1 << 7;
3818     } else if (float16_is_zero(f)) {
3819         return sign ? 1 << 3 : 1 << 4;
3820     } else if (float16_is_zero_or_denormal(f)) {
3821         return sign ? 1 << 2 : 1 << 5;
3822     } else if (float16_is_any_nan(f)) {
3823         float_status s = { }; /* for snan_bit_is_one */
3824         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3825     } else {
3826         return sign ? 1 << 1 : 1 << 6;
3827     }
3828 }
3829 
3830 target_ulong fclass_s(uint64_t frs1)
3831 {
3832     float32 f = frs1;
3833     bool sign = float32_is_neg(f);
3834 
3835     if (float32_is_infinity(f)) {
3836         return sign ? 1 << 0 : 1 << 7;
3837     } else if (float32_is_zero(f)) {
3838         return sign ? 1 << 3 : 1 << 4;
3839     } else if (float32_is_zero_or_denormal(f)) {
3840         return sign ? 1 << 2 : 1 << 5;
3841     } else if (float32_is_any_nan(f)) {
3842         float_status s = { }; /* for snan_bit_is_one */
3843         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3844     } else {
3845         return sign ? 1 << 1 : 1 << 6;
3846     }
3847 }
3848 
3849 target_ulong fclass_d(uint64_t frs1)
3850 {
3851     float64 f = frs1;
3852     bool sign = float64_is_neg(f);
3853 
3854     if (float64_is_infinity(f)) {
3855         return sign ? 1 << 0 : 1 << 7;
3856     } else if (float64_is_zero(f)) {
3857         return sign ? 1 << 3 : 1 << 4;
3858     } else if (float64_is_zero_or_denormal(f)) {
3859         return sign ? 1 << 2 : 1 << 5;
3860     } else if (float64_is_any_nan(f)) {
3861         float_status s = { }; /* for snan_bit_is_one */
3862         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3863     } else {
3864         return sign ? 1 << 1 : 1 << 6;
3865     }
3866 }
3867 
3868 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
3869 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
3870 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
3871 GEN_VEXT_V(vfclass_v_h, 2, 2)
3872 GEN_VEXT_V(vfclass_v_w, 4, 4)
3873 GEN_VEXT_V(vfclass_v_d, 8, 8)
3874 
3875 /* Vector Floating-Point Merge Instruction */
3876 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
3877 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3878                   CPURISCVState *env, uint32_t desc)          \
3879 {                                                             \
3880     uint32_t vm = vext_vm(desc);                              \
3881     uint32_t vl = env->vl;                                    \
3882     uint32_t i;                                               \
3883                                                               \
3884     for (i = 0; i < vl; i++) {                                \
3885         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3886         *((ETYPE *)vd + H(i))                                 \
3887           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
3888     }                                                         \
3889 }
3890 
3891 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
3892 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
3893 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
3894 
3895 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
3896 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3897 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
3898 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
3899 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
3900 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
3901 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
3902 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
3903 
3904 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
3905 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
3906 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
3907 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
3908 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
3909 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
3910 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
3911 
3912 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
3913 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
3914 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
3915 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
3916 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
3917 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
3918 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
3919 
3920 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
3921 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
3922 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
3923 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
3924 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
3925 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
3926 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
3927 
3928 /* Widening Floating-Point/Integer Type-Convert Instructions */
3929 /* (TD, T2, TX2) */
3930 #define WOP_UU_H uint32_t, uint16_t, uint16_t
3931 #define WOP_UU_W uint64_t, uint32_t, uint32_t
3932 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
3933 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
3934 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
3935 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
3936 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
3937 
3938 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
3939 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
3940 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
3941 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
3942 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
3943 
3944 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3945 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
3946 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
3947 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
3948 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
3949 
3950 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3951 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
3952 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
3953 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
3954 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
3955 
3956 /*
3957  * vfwcvt.f.f.v vd, vs2, vm #
3958  * Convert single-width float to double-width float.
3959  */
3960 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
3961 {
3962     return float16_to_float32(a, true, s);
3963 }
3964 
3965 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
3966 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
3967 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
3968 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
3969 
3970 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
3971 /* (TD, T2, TX2) */
3972 #define NOP_UU_H uint16_t, uint32_t, uint32_t
3973 #define NOP_UU_W uint32_t, uint64_t, uint64_t
3974 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3975 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16)
3976 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32)
3977 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2)
3978 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4)
3979 
3980 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
3981 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16)
3982 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32)
3983 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2)
3984 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4)
3985 
3986 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
3987 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16)
3988 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32)
3989 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2)
3990 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4)
3991 
3992 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
3993 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16)
3994 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32)
3995 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2)
3996 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4)
3997 
3998 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
3999 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4000 {
4001     return float32_to_float16(a, true, s);
4002 }
4003 
4004 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
4005 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
4006 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2)
4007 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4)
4008 
4009 /*
4010  *** Vector Reduction Operations
4011  */
4012 /* Vector Single-Width Integer Reduction Instructions */
4013 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4014 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4015         void *vs2, CPURISCVState *env, uint32_t desc)     \
4016 {                                                         \
4017     uint32_t vm = vext_vm(desc);                          \
4018     uint32_t vl = env->vl;                                \
4019     uint32_t i;                                           \
4020     TD s1 =  *((TD *)vs1 + HD(0));                        \
4021                                                           \
4022     for (i = 0; i < vl; i++) {                            \
4023         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4024         if (!vm && !vext_elem_mask(v0, i)) {              \
4025             continue;                                     \
4026         }                                                 \
4027         s1 = OP(s1, (TD)s2);                              \
4028     }                                                     \
4029     *((TD *)vd + HD(0)) = s1;                             \
4030 }
4031 
4032 /* vd[0] = sum(vs1[0], vs2[*]) */
4033 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4034 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4035 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4036 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4037 
4038 /* vd[0] = maxu(vs1[0], vs2[*]) */
4039 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4040 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4041 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4042 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4043 
4044 /* vd[0] = max(vs1[0], vs2[*]) */
4045 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4046 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4047 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4048 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4049 
4050 /* vd[0] = minu(vs1[0], vs2[*]) */
4051 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4052 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4053 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4054 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4055 
4056 /* vd[0] = min(vs1[0], vs2[*]) */
4057 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4058 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4059 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4060 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4061 
4062 /* vd[0] = and(vs1[0], vs2[*]) */
4063 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4064 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4065 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4066 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4067 
4068 /* vd[0] = or(vs1[0], vs2[*]) */
4069 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4070 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4071 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4072 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4073 
4074 /* vd[0] = xor(vs1[0], vs2[*]) */
4075 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4076 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4077 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4078 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4079 
4080 /* Vector Widening Integer Reduction Instructions */
4081 /* signed sum reduction into double-width accumulator */
4082 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4083 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4084 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4085 
4086 /* Unsigned sum reduction into double-width accumulator */
4087 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4088 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4089 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4090 
4091 /* Vector Single-Width Floating-Point Reduction Instructions */
4092 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4093 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4094                   void *vs2, CPURISCVState *env,           \
4095                   uint32_t desc)                           \
4096 {                                                          \
4097     uint32_t vm = vext_vm(desc);                           \
4098     uint32_t vl = env->vl;                                 \
4099     uint32_t i;                                            \
4100     TD s1 =  *((TD *)vs1 + HD(0));                         \
4101                                                            \
4102     for (i = 0; i < vl; i++) {                             \
4103         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4104         if (!vm && !vext_elem_mask(v0, i)) {               \
4105             continue;                                      \
4106         }                                                  \
4107         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4108     }                                                      \
4109     *((TD *)vd + HD(0)) = s1;                              \
4110 }
4111 
4112 /* Unordered sum */
4113 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4114 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4115 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4116 
4117 /* Maximum value */
4118 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum)
4119 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum)
4120 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum)
4121 
4122 /* Minimum value */
4123 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum)
4124 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum)
4125 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum)
4126 
4127 /* Vector Widening Floating-Point Reduction Instructions */
4128 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4129 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4130                             void *vs2, CPURISCVState *env, uint32_t desc)
4131 {
4132     uint32_t vm = vext_vm(desc);
4133     uint32_t vl = env->vl;
4134     uint32_t i;
4135     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4136 
4137     for (i = 0; i < vl; i++) {
4138         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4139         if (!vm && !vext_elem_mask(v0, i)) {
4140             continue;
4141         }
4142         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4143                          &env->fp_status);
4144     }
4145     *((uint32_t *)vd + H4(0)) = s1;
4146 }
4147 
4148 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4149                             void *vs2, CPURISCVState *env, uint32_t desc)
4150 {
4151     uint32_t vm = vext_vm(desc);
4152     uint32_t vl = env->vl;
4153     uint32_t i;
4154     uint64_t s1 =  *((uint64_t *)vs1);
4155 
4156     for (i = 0; i < vl; i++) {
4157         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4158         if (!vm && !vext_elem_mask(v0, i)) {
4159             continue;
4160         }
4161         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4162                          &env->fp_status);
4163     }
4164     *((uint64_t *)vd) = s1;
4165 }
4166 
4167 /*
4168  *** Vector Mask Operations
4169  */
4170 /* Vector Mask-Register Logical Instructions */
4171 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4172 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4173                   void *vs2, CPURISCVState *env,          \
4174                   uint32_t desc)                          \
4175 {                                                         \
4176     uint32_t vlmax = env_archcpu(env)->cfg.vlen;          \
4177     uint32_t vl = env->vl;                                \
4178     uint32_t i;                                           \
4179     int a, b;                                             \
4180                                                           \
4181     for (i = 0; i < vl; i++) {                            \
4182         a = vext_elem_mask(vs1, i);                       \
4183         b = vext_elem_mask(vs2, i);                       \
4184         vext_set_elem_mask(vd, i, OP(b, a));              \
4185     }                                                     \
4186     for (; i < vlmax; i++) {                              \
4187         vext_set_elem_mask(vd, i, 0);                     \
4188     }                                                     \
4189 }
4190 
4191 #define DO_NAND(N, M)  (!(N & M))
4192 #define DO_ANDNOT(N, M)  (N & !M)
4193 #define DO_NOR(N, M)  (!(N | M))
4194 #define DO_ORNOT(N, M)  (N | !M)
4195 #define DO_XNOR(N, M)  (!(N ^ M))
4196 
4197 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4198 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4199 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
4200 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4201 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4202 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4203 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
4204 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4205 
4206 /* Vector mask population count vmpopc */
4207 target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env,
4208                               uint32_t desc)
4209 {
4210     target_ulong cnt = 0;
4211     uint32_t vm = vext_vm(desc);
4212     uint32_t vl = env->vl;
4213     int i;
4214 
4215     for (i = 0; i < vl; i++) {
4216         if (vm || vext_elem_mask(v0, i)) {
4217             if (vext_elem_mask(vs2, i)) {
4218                 cnt++;
4219             }
4220         }
4221     }
4222     return cnt;
4223 }
4224 
4225 /* vmfirst find-first-set mask bit*/
4226 target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4227                                uint32_t desc)
4228 {
4229     uint32_t vm = vext_vm(desc);
4230     uint32_t vl = env->vl;
4231     int i;
4232 
4233     for (i = 0; i < vl; i++) {
4234         if (vm || vext_elem_mask(v0, i)) {
4235             if (vext_elem_mask(vs2, i)) {
4236                 return i;
4237             }
4238         }
4239     }
4240     return -1LL;
4241 }
4242 
4243 enum set_mask_type {
4244     ONLY_FIRST = 1,
4245     INCLUDE_FIRST,
4246     BEFORE_FIRST,
4247 };
4248 
4249 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4250                    uint32_t desc, enum set_mask_type type)
4251 {
4252     uint32_t vlmax = env_archcpu(env)->cfg.vlen;
4253     uint32_t vm = vext_vm(desc);
4254     uint32_t vl = env->vl;
4255     int i;
4256     bool first_mask_bit = false;
4257 
4258     for (i = 0; i < vl; i++) {
4259         if (!vm && !vext_elem_mask(v0, i)) {
4260             continue;
4261         }
4262         /* write a zero to all following active elements */
4263         if (first_mask_bit) {
4264             vext_set_elem_mask(vd, i, 0);
4265             continue;
4266         }
4267         if (vext_elem_mask(vs2, i)) {
4268             first_mask_bit = true;
4269             if (type == BEFORE_FIRST) {
4270                 vext_set_elem_mask(vd, i, 0);
4271             } else {
4272                 vext_set_elem_mask(vd, i, 1);
4273             }
4274         } else {
4275             if (type == ONLY_FIRST) {
4276                 vext_set_elem_mask(vd, i, 0);
4277             } else {
4278                 vext_set_elem_mask(vd, i, 1);
4279             }
4280         }
4281     }
4282     for (; i < vlmax; i++) {
4283         vext_set_elem_mask(vd, i, 0);
4284     }
4285 }
4286 
4287 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4288                      uint32_t desc)
4289 {
4290     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4291 }
4292 
4293 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4294                      uint32_t desc)
4295 {
4296     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4297 }
4298 
4299 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4300                      uint32_t desc)
4301 {
4302     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4303 }
4304 
4305 /* Vector Iota Instruction */
4306 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4307 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4308                   uint32_t desc)                                          \
4309 {                                                                         \
4310     uint32_t vm = vext_vm(desc);                                          \
4311     uint32_t vl = env->vl;                                                \
4312     uint32_t sum = 0;                                                     \
4313     int i;                                                                \
4314                                                                           \
4315     for (i = 0; i < vl; i++) {                                            \
4316         if (!vm && !vext_elem_mask(v0, i)) {                              \
4317             continue;                                                     \
4318         }                                                                 \
4319         *((ETYPE *)vd + H(i)) = sum;                                      \
4320         if (vext_elem_mask(vs2, i)) {                                     \
4321             sum++;                                                        \
4322         }                                                                 \
4323     }                                                                     \
4324 }
4325 
4326 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4327 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4328 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4329 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4330 
4331 /* Vector Element Index Instruction */
4332 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4333 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4334 {                                                                         \
4335     uint32_t vm = vext_vm(desc);                                          \
4336     uint32_t vl = env->vl;                                                \
4337     int i;                                                                \
4338                                                                           \
4339     for (i = 0; i < vl; i++) {                                            \
4340         if (!vm && !vext_elem_mask(v0, i)) {                              \
4341             continue;                                                     \
4342         }                                                                 \
4343         *((ETYPE *)vd + H(i)) = i;                                        \
4344     }                                                                     \
4345 }
4346 
4347 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4348 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4349 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4350 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4351 
4352 /*
4353  *** Vector Permutation Instructions
4354  */
4355 
4356 /* Vector Slide Instructions */
4357 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4358 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4359                   CPURISCVState *env, uint32_t desc)                      \
4360 {                                                                         \
4361     uint32_t vm = vext_vm(desc);                                          \
4362     uint32_t vl = env->vl;                                                \
4363     target_ulong offset = s1, i;                                          \
4364                                                                           \
4365     for (i = offset; i < vl; i++) {                                       \
4366         if (!vm && !vext_elem_mask(v0, i)) {                              \
4367             continue;                                                     \
4368         }                                                                 \
4369         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4370     }                                                                     \
4371 }
4372 
4373 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4374 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4375 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4376 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4377 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4378 
4379 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4380 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4381                   CPURISCVState *env, uint32_t desc)                      \
4382 {                                                                         \
4383     uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
4384     uint32_t vm = vext_vm(desc);                                          \
4385     uint32_t vl = env->vl;                                                \
4386     target_ulong offset = s1, i;                                          \
4387                                                                           \
4388     for (i = 0; i < vl; ++i) {                                            \
4389         target_ulong j = i + offset;                                      \
4390         if (!vm && !vext_elem_mask(v0, i)) {                              \
4391             continue;                                                     \
4392         }                                                                 \
4393         *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j));  \
4394     }                                                                     \
4395 }
4396 
4397 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4398 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4399 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4400 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4401 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4402 
4403 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H)                             \
4404 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4405                   CPURISCVState *env, uint32_t desc)                      \
4406 {                                                                         \
4407     uint32_t vm = vext_vm(desc);                                          \
4408     uint32_t vl = env->vl;                                                \
4409     uint32_t i;                                                           \
4410                                                                           \
4411     for (i = 0; i < vl; i++) {                                            \
4412         if (!vm && !vext_elem_mask(v0, i)) {                              \
4413             continue;                                                     \
4414         }                                                                 \
4415         if (i == 0) {                                                     \
4416             *((ETYPE *)vd + H(i)) = s1;                                   \
4417         } else {                                                          \
4418             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));           \
4419         }                                                                 \
4420     }                                                                     \
4421 }
4422 
4423 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4424 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t,  H1)
4425 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2)
4426 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4)
4427 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8)
4428 
4429 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H)                           \
4430 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4431                   CPURISCVState *env, uint32_t desc)                      \
4432 {                                                                         \
4433     uint32_t vm = vext_vm(desc);                                          \
4434     uint32_t vl = env->vl;                                                \
4435     uint32_t i;                                                           \
4436                                                                           \
4437     for (i = 0; i < vl; i++) {                                            \
4438         if (!vm && !vext_elem_mask(v0, i)) {                              \
4439             continue;                                                     \
4440         }                                                                 \
4441         if (i == vl - 1) {                                                \
4442             *((ETYPE *)vd + H(i)) = s1;                                   \
4443         } else {                                                          \
4444             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));           \
4445         }                                                                 \
4446     }                                                                     \
4447 }
4448 
4449 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4450 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t,  H1)
4451 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2)
4452 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4)
4453 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8)
4454 
4455 /* Vector Register Gather Instruction */
4456 #define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H)                              \
4457 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4458                   CPURISCVState *env, uint32_t desc)                      \
4459 {                                                                         \
4460     uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
4461     uint32_t vm = vext_vm(desc);                                          \
4462     uint32_t vl = env->vl;                                                \
4463     uint64_t index;                                                       \
4464     uint32_t i;                                                           \
4465                                                                           \
4466     for (i = 0; i < vl; i++) {                                            \
4467         if (!vm && !vext_elem_mask(v0, i)) {                              \
4468             continue;                                                     \
4469         }                                                                 \
4470         index = *((ETYPE *)vs1 + H(i));                                   \
4471         if (index >= vlmax) {                                             \
4472             *((ETYPE *)vd + H(i)) = 0;                                    \
4473         } else {                                                          \
4474             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4475         }                                                                 \
4476     }                                                                     \
4477 }
4478 
4479 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4480 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  H1)
4481 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2)
4482 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4)
4483 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8)
4484 
4485 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4486 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4487                   CPURISCVState *env, uint32_t desc)                      \
4488 {                                                                         \
4489     uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
4490     uint32_t vm = vext_vm(desc);                                          \
4491     uint32_t vl = env->vl;                                                \
4492     uint64_t index = s1;                                                  \
4493     uint32_t i;                                                           \
4494                                                                           \
4495     for (i = 0; i < vl; i++) {                                            \
4496         if (!vm && !vext_elem_mask(v0, i)) {                              \
4497             continue;                                                     \
4498         }                                                                 \
4499         if (index >= vlmax) {                                             \
4500             *((ETYPE *)vd + H(i)) = 0;                                    \
4501         } else {                                                          \
4502             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4503         }                                                                 \
4504     }                                                                     \
4505 }
4506 
4507 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4508 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4509 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4510 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4511 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4512 
4513 /* Vector Compress Instruction */
4514 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4515 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4516                   CPURISCVState *env, uint32_t desc)                      \
4517 {                                                                         \
4518     uint32_t vl = env->vl;                                                \
4519     uint32_t num = 0, i;                                                  \
4520                                                                           \
4521     for (i = 0; i < vl; i++) {                                            \
4522         if (!vext_elem_mask(vs1, i)) {                                    \
4523             continue;                                                     \
4524         }                                                                 \
4525         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4526         num++;                                                            \
4527     }                                                                     \
4528 }
4529 
4530 /* Compress into vd elements of vs2 where vs1 is enabled */
4531 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4532 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4533 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4534 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4535