xref: /openbmc/qemu/target/riscv/vector_helper.c (revision d3e5e2ff)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "exec/memop.h"
22 #include "exec/exec-all.h"
23 #include "exec/helper-proto.h"
24 #include "fpu/softfloat.h"
25 #include "tcg/tcg-gvec-desc.h"
26 #include "internals.h"
27 #include <math.h>
28 
29 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
30                             target_ulong s2)
31 {
32     int vlmax, vl;
33     RISCVCPU *cpu = env_archcpu(env);
34     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
35     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
36     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
37     bool vill = FIELD_EX64(s2, VTYPE, VILL);
38     target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
39 
40     if (lmul & 4) {
41         /* Fractional LMUL. */
42         if (lmul == 4 ||
43             cpu->cfg.elen >> (8 - lmul) < sew) {
44             vill = true;
45         }
46     }
47 
48     if ((sew > cpu->cfg.elen)
49         || vill
50         || (ediv != 0)
51         || (reserved != 0)) {
52         /* only set vill bit. */
53         env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
54         env->vl = 0;
55         env->vstart = 0;
56         return 0;
57     }
58 
59     vlmax = vext_get_vlmax(cpu, s2);
60     if (s1 <= vlmax) {
61         vl = s1;
62     } else {
63         vl = vlmax;
64     }
65     env->vl = vl;
66     env->vtype = s2;
67     env->vstart = 0;
68     return vl;
69 }
70 
71 /*
72  * Note that vector data is stored in host-endian 64-bit chunks,
73  * so addressing units smaller than that needs a host-endian fixup.
74  */
75 #ifdef HOST_WORDS_BIGENDIAN
76 #define H1(x)   ((x) ^ 7)
77 #define H1_2(x) ((x) ^ 6)
78 #define H1_4(x) ((x) ^ 4)
79 #define H2(x)   ((x) ^ 3)
80 #define H4(x)   ((x) ^ 1)
81 #define H8(x)   ((x))
82 #else
83 #define H1(x)   (x)
84 #define H1_2(x) (x)
85 #define H1_4(x) (x)
86 #define H2(x)   (x)
87 #define H4(x)   (x)
88 #define H8(x)   (x)
89 #endif
90 
91 static inline uint32_t vext_nf(uint32_t desc)
92 {
93     return FIELD_EX32(simd_data(desc), VDATA, NF);
94 }
95 
96 static inline uint32_t vext_vm(uint32_t desc)
97 {
98     return FIELD_EX32(simd_data(desc), VDATA, VM);
99 }
100 
101 /*
102  * Encode LMUL to lmul as following:
103  *     LMUL    vlmul    lmul
104  *      1       000       0
105  *      2       001       1
106  *      4       010       2
107  *      8       011       3
108  *      -       100       -
109  *     1/8      101      -3
110  *     1/4      110      -2
111  *     1/2      111      -1
112  */
113 static inline int32_t vext_lmul(uint32_t desc)
114 {
115     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
116 }
117 
118 /*
119  * Get vector group length in bytes. Its range is [64, 2048].
120  *
121  * As simd_desc support at most 256, the max vlen is 512 bits.
122  * So vlen in bytes is encoded as maxsz.
123  */
124 static inline uint32_t vext_maxsz(uint32_t desc)
125 {
126     return simd_maxsz(desc) << vext_lmul(desc);
127 }
128 
129 /*
130  * This function checks watchpoint before real load operation.
131  *
132  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
133  * In user mode, there is no watchpoint support now.
134  *
135  * It will trigger an exception if there is no mapping in TLB
136  * and page table walk can't fill the TLB entry. Then the guest
137  * software can return here after process the exception or never return.
138  */
139 static void probe_pages(CPURISCVState *env, target_ulong addr,
140                         target_ulong len, uintptr_t ra,
141                         MMUAccessType access_type)
142 {
143     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
144     target_ulong curlen = MIN(pagelen, len);
145 
146     probe_access(env, addr, curlen, access_type,
147                  cpu_mmu_index(env, false), ra);
148     if (len > curlen) {
149         addr += curlen;
150         curlen = len - curlen;
151         probe_access(env, addr, curlen, access_type,
152                      cpu_mmu_index(env, false), ra);
153     }
154 }
155 
156 static inline void vext_set_elem_mask(void *v0, int index,
157                                       uint8_t value)
158 {
159     int idx = index / 64;
160     int pos = index % 64;
161     uint64_t old = ((uint64_t *)v0)[idx];
162     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
163 }
164 
165 /*
166  * Earlier designs (pre-0.9) had a varying number of bits
167  * per mask value (MLEN). In the 0.9 design, MLEN=1.
168  * (Section 4.5)
169  */
170 static inline int vext_elem_mask(void *v0, int index)
171 {
172     int idx = index / 64;
173     int pos = index  % 64;
174     return (((uint64_t *)v0)[idx] >> pos) & 1;
175 }
176 
177 /* elements operations for load and store */
178 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
179                                uint32_t idx, void *vd, uintptr_t retaddr);
180 
181 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
182 static void NAME(CPURISCVState *env, abi_ptr addr,         \
183                  uint32_t idx, void *vd, uintptr_t retaddr)\
184 {                                                          \
185     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
186     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
187 }                                                          \
188 
189 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
190 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
191 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
192 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
193 
194 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
195 static void NAME(CPURISCVState *env, abi_ptr addr,         \
196                  uint32_t idx, void *vd, uintptr_t retaddr)\
197 {                                                          \
198     ETYPE data = *((ETYPE *)vd + H(idx));                  \
199     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
200 }
201 
202 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
203 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
204 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
205 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
206 
207 /*
208  *** stride: access vector element from strided memory
209  */
210 static void
211 vext_ldst_stride(void *vd, void *v0, target_ulong base,
212                  target_ulong stride, CPURISCVState *env,
213                  uint32_t desc, uint32_t vm,
214                  vext_ldst_elem_fn *ldst_elem,
215                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
216 {
217     uint32_t i, k;
218     uint32_t nf = vext_nf(desc);
219     uint32_t vlmax = vext_maxsz(desc) / esz;
220 
221     /* probe every access*/
222     for (i = 0; i < env->vl; i++) {
223         if (!vm && !vext_elem_mask(v0, i)) {
224             continue;
225         }
226         probe_pages(env, base + stride * i, nf * esz, ra, access_type);
227     }
228     /* do real access */
229     for (i = 0; i < env->vl; i++) {
230         k = 0;
231         if (!vm && !vext_elem_mask(v0, i)) {
232             continue;
233         }
234         while (k < nf) {
235             target_ulong addr = base + stride * i + k * esz;
236             ldst_elem(env, addr, i + k * vlmax, vd, ra);
237             k++;
238         }
239     }
240 }
241 
242 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
243 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
244                   target_ulong stride, CPURISCVState *env,              \
245                   uint32_t desc)                                        \
246 {                                                                       \
247     uint32_t vm = vext_vm(desc);                                        \
248     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
249                      sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);            \
250 }
251 
252 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
253 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
254 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
255 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
256 
257 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
258 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
259                   target_ulong stride, CPURISCVState *env,              \
260                   uint32_t desc)                                        \
261 {                                                                       \
262     uint32_t vm = vext_vm(desc);                                        \
263     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
264                      sizeof(ETYPE), GETPC(), MMU_DATA_STORE);           \
265 }
266 
267 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
268 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
269 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
270 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
271 
272 /*
273  *** unit-stride: access elements stored contiguously in memory
274  */
275 
276 /* unmasked unit-stride load and store operation*/
277 static void
278 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
279              vext_ldst_elem_fn *ldst_elem,
280              uint32_t esz, uintptr_t ra, MMUAccessType access_type)
281 {
282     uint32_t i, k;
283     uint32_t nf = vext_nf(desc);
284     uint32_t vlmax = vext_maxsz(desc) / esz;
285 
286     /* probe every access */
287     probe_pages(env, base, env->vl * nf * esz, ra, access_type);
288     /* load bytes from guest memory */
289     for (i = 0; i < env->vl; i++) {
290         k = 0;
291         while (k < nf) {
292             target_ulong addr = base + (i * nf + k) * esz;
293             ldst_elem(env, addr, i + k * vlmax, vd, ra);
294             k++;
295         }
296     }
297 }
298 
299 /*
300  * masked unit-stride load and store operation will be a special case of stride,
301  * stride = NF * sizeof (MTYPE)
302  */
303 
304 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
305 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
306                          CPURISCVState *env, uint32_t desc)             \
307 {                                                                       \
308     uint32_t stride = vext_nf(desc) * sizeof(ETYPE);                    \
309     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
310                      sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);            \
311 }                                                                       \
312                                                                         \
313 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
314                   CPURISCVState *env, uint32_t desc)                    \
315 {                                                                       \
316     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
317                  sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);                \
318 }
319 
320 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
321 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
322 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
323 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
324 
325 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                           \
326 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
327                          CPURISCVState *env, uint32_t desc)             \
328 {                                                                       \
329     uint32_t stride = vext_nf(desc) * sizeof(ETYPE);                    \
330     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,  \
331                      sizeof(ETYPE), GETPC(), MMU_DATA_STORE);           \
332 }                                                                       \
333                                                                         \
334 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
335                   CPURISCVState *env, uint32_t desc)                    \
336 {                                                                       \
337     vext_ldst_us(vd, base, env, desc, STORE_FN,                         \
338                  sizeof(ETYPE), GETPC(), MMU_DATA_STORE);               \
339 }
340 
341 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
342 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
343 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
344 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
345 
346 /*
347  *** index: access vector element from indexed memory
348  */
349 typedef target_ulong vext_get_index_addr(target_ulong base,
350         uint32_t idx, void *vs2);
351 
352 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
353 static target_ulong NAME(target_ulong base,            \
354                          uint32_t idx, void *vs2)      \
355 {                                                      \
356     return (base + *((ETYPE *)vs2 + H(idx)));          \
357 }
358 
359 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
360 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
361 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
362 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
363 
364 static inline void
365 vext_ldst_index(void *vd, void *v0, target_ulong base,
366                 void *vs2, CPURISCVState *env, uint32_t desc,
367                 vext_get_index_addr get_index_addr,
368                 vext_ldst_elem_fn *ldst_elem,
369                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
370 {
371     uint32_t i, k;
372     uint32_t nf = vext_nf(desc);
373     uint32_t vm = vext_vm(desc);
374     uint32_t vlmax = vext_maxsz(desc) / esz;
375 
376     /* probe every access*/
377     for (i = 0; i < env->vl; i++) {
378         if (!vm && !vext_elem_mask(v0, i)) {
379             continue;
380         }
381         probe_pages(env, get_index_addr(base, i, vs2), nf * esz, ra,
382                     access_type);
383     }
384     /* load bytes from guest memory */
385     for (i = 0; i < env->vl; i++) {
386         k = 0;
387         if (!vm && !vext_elem_mask(v0, i)) {
388             continue;
389         }
390         while (k < nf) {
391             abi_ptr addr = get_index_addr(base, i, vs2) + k * esz;
392             ldst_elem(env, addr, i + k * vlmax, vd, ra);
393             k++;
394         }
395     }
396 }
397 
398 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
399 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
400                   void *vs2, CPURISCVState *env, uint32_t desc)            \
401 {                                                                          \
402     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
403                     LOAD_FN, sizeof(ETYPE), GETPC(), MMU_DATA_LOAD);       \
404 }
405 
406 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
407 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
408 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
409 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
410 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
411 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
412 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
413 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
414 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
415 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
416 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
417 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
418 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
419 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
420 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
421 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
422 
423 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
424 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
425                   void *vs2, CPURISCVState *env, uint32_t desc)  \
426 {                                                                \
427     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
428                     STORE_FN, sizeof(ETYPE),                     \
429                     GETPC(), MMU_DATA_STORE);                    \
430 }
431 
432 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
433 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
434 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
435 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
436 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
437 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
438 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
439 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
440 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
441 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
442 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
443 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
444 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
445 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
446 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
447 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
448 
449 /*
450  *** unit-stride fault-only-fisrt load instructions
451  */
452 static inline void
453 vext_ldff(void *vd, void *v0, target_ulong base,
454           CPURISCVState *env, uint32_t desc,
455           vext_ldst_elem_fn *ldst_elem,
456           uint32_t esz, uintptr_t ra)
457 {
458     void *host;
459     uint32_t i, k, vl = 0;
460     uint32_t nf = vext_nf(desc);
461     uint32_t vm = vext_vm(desc);
462     uint32_t vlmax = vext_maxsz(desc) / esz;
463     target_ulong addr, offset, remain;
464 
465     /* probe every access*/
466     for (i = 0; i < env->vl; i++) {
467         if (!vm && !vext_elem_mask(v0, i)) {
468             continue;
469         }
470         addr = base + nf * i * esz;
471         if (i == 0) {
472             probe_pages(env, addr, nf * esz, ra, MMU_DATA_LOAD);
473         } else {
474             /* if it triggers an exception, no need to check watchpoint */
475             remain = nf * esz;
476             while (remain > 0) {
477                 offset = -(addr | TARGET_PAGE_MASK);
478                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
479                                          cpu_mmu_index(env, false));
480                 if (host) {
481 #ifdef CONFIG_USER_ONLY
482                     if (page_check_range(addr, nf * esz, PAGE_READ) < 0) {
483                         vl = i;
484                         goto ProbeSuccess;
485                     }
486 #else
487                     probe_pages(env, addr, nf * esz, ra, MMU_DATA_LOAD);
488 #endif
489                 } else {
490                     vl = i;
491                     goto ProbeSuccess;
492                 }
493                 if (remain <=  offset) {
494                     break;
495                 }
496                 remain -= offset;
497                 addr += offset;
498             }
499         }
500     }
501 ProbeSuccess:
502     /* load bytes from guest memory */
503     if (vl != 0) {
504         env->vl = vl;
505     }
506     for (i = 0; i < env->vl; i++) {
507         k = 0;
508         if (!vm && !vext_elem_mask(v0, i)) {
509             continue;
510         }
511         while (k < nf) {
512             target_ulong addr = base + (i * nf + k) * esz;
513             ldst_elem(env, addr, i + k * vlmax, vd, ra);
514             k++;
515         }
516     }
517 }
518 
519 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
520 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
521                   CPURISCVState *env, uint32_t desc)      \
522 {                                                         \
523     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
524               sizeof(ETYPE), GETPC());                    \
525 }
526 
527 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
528 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
529 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
530 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
531 
532 #define DO_SWAP(N, M) (M)
533 #define DO_AND(N, M)  (N & M)
534 #define DO_XOR(N, M)  (N ^ M)
535 #define DO_OR(N, M)   (N | M)
536 #define DO_ADD(N, M)  (N + M)
537 
538 /* Signed min/max */
539 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
540 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
541 
542 /* Unsigned min/max */
543 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
544 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
545 
546 /*
547  *** Vector Integer Arithmetic Instructions
548  */
549 
550 /* expand macro args before macro */
551 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
552 
553 /* (TD, T1, T2, TX1, TX2) */
554 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
555 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
556 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
557 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
559 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
560 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
561 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
562 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
563 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
564 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
565 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
566 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
567 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
568 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
569 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
570 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
571 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
572 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
573 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
574 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
575 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
576 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
577 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
578 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
579 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
580 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
581 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
582 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
583 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
584 
585 /* operation of two vector elements */
586 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
587 
588 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
589 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
590 {                                                               \
591     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
592     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
593     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
594 }
595 #define DO_SUB(N, M) (N - M)
596 #define DO_RSUB(N, M) (M - N)
597 
598 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
599 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
600 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
601 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
602 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
603 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
604 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
605 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
606 
607 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
608                        CPURISCVState *env, uint32_t desc,
609                        uint32_t esz, uint32_t dsz,
610                        opivv2_fn *fn)
611 {
612     uint32_t vm = vext_vm(desc);
613     uint32_t vl = env->vl;
614     uint32_t i;
615 
616     for (i = 0; i < vl; i++) {
617         if (!vm && !vext_elem_mask(v0, i)) {
618             continue;
619         }
620         fn(vd, vs1, vs2, i);
621     }
622 }
623 
624 /* generate the helpers for OPIVV */
625 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
626 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
627                   void *vs2, CPURISCVState *env,          \
628                   uint32_t desc)                          \
629 {                                                         \
630     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
631                do_##NAME);                                \
632 }
633 
634 GEN_VEXT_VV(vadd_vv_b, 1, 1)
635 GEN_VEXT_VV(vadd_vv_h, 2, 2)
636 GEN_VEXT_VV(vadd_vv_w, 4, 4)
637 GEN_VEXT_VV(vadd_vv_d, 8, 8)
638 GEN_VEXT_VV(vsub_vv_b, 1, 1)
639 GEN_VEXT_VV(vsub_vv_h, 2, 2)
640 GEN_VEXT_VV(vsub_vv_w, 4, 4)
641 GEN_VEXT_VV(vsub_vv_d, 8, 8)
642 
643 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
644 
645 /*
646  * (T1)s1 gives the real operator type.
647  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
648  */
649 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
650 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
651 {                                                                   \
652     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
653     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
654 }
655 
656 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
657 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
658 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
659 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
660 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
661 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
662 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
663 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
664 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
665 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
666 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
667 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
668 
669 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
670                        CPURISCVState *env, uint32_t desc,
671                        uint32_t esz, uint32_t dsz,
672                        opivx2_fn fn)
673 {
674     uint32_t vm = vext_vm(desc);
675     uint32_t vl = env->vl;
676     uint32_t i;
677 
678     for (i = 0; i < vl; i++) {
679         if (!vm && !vext_elem_mask(v0, i)) {
680             continue;
681         }
682         fn(vd, s1, vs2, i);
683     }
684 }
685 
686 /* generate the helpers for OPIVX */
687 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
688 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
689                   void *vs2, CPURISCVState *env,          \
690                   uint32_t desc)                          \
691 {                                                         \
692     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
693                do_##NAME);                                \
694 }
695 
696 GEN_VEXT_VX(vadd_vx_b, 1, 1)
697 GEN_VEXT_VX(vadd_vx_h, 2, 2)
698 GEN_VEXT_VX(vadd_vx_w, 4, 4)
699 GEN_VEXT_VX(vadd_vx_d, 8, 8)
700 GEN_VEXT_VX(vsub_vx_b, 1, 1)
701 GEN_VEXT_VX(vsub_vx_h, 2, 2)
702 GEN_VEXT_VX(vsub_vx_w, 4, 4)
703 GEN_VEXT_VX(vsub_vx_d, 8, 8)
704 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
705 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
706 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
707 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
708 
709 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
710 {
711     intptr_t oprsz = simd_oprsz(desc);
712     intptr_t i;
713 
714     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
715         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
716     }
717 }
718 
719 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
720 {
721     intptr_t oprsz = simd_oprsz(desc);
722     intptr_t i;
723 
724     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
725         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
726     }
727 }
728 
729 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
730 {
731     intptr_t oprsz = simd_oprsz(desc);
732     intptr_t i;
733 
734     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
735         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
736     }
737 }
738 
739 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
740 {
741     intptr_t oprsz = simd_oprsz(desc);
742     intptr_t i;
743 
744     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
745         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
746     }
747 }
748 
749 /* Vector Widening Integer Add/Subtract */
750 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
751 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
752 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
753 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
754 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
755 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
756 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
757 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
758 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
759 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
760 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
761 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
762 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
763 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
764 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
765 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
766 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
767 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
768 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
769 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
770 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
771 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
772 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
773 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
774 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
775 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
776 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
777 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
778 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
779 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
780 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
781 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
782 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
783 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
784 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
785 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
786 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
787 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
788 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
789 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
790 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
791 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
792 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
793 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
794 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
795 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
796 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
797 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
798 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
799 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
800 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
801 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
802 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
803 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
804 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
805 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
806 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
807 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
808 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
809 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
810 
811 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
812 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
813 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
814 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
815 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
816 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
817 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
818 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
819 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
820 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
821 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
822 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
823 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
824 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
825 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
826 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
827 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
828 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
829 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
830 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
831 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
832 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
833 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
834 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
835 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
836 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
837 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
838 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
839 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
840 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
841 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
842 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
843 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
844 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
845 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
846 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
847 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
848 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
849 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
850 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
851 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
852 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
853 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
854 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
855 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
856 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
857 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
858 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
859 
860 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
861 #define DO_VADC(N, M, C) (N + M + C)
862 #define DO_VSBC(N, M, C) (N - M - C)
863 
864 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
865 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
866                   CPURISCVState *env, uint32_t desc)          \
867 {                                                             \
868     uint32_t vl = env->vl;                                    \
869     uint32_t i;                                               \
870                                                               \
871     for (i = 0; i < vl; i++) {                                \
872         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
873         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
874         uint8_t carry = vext_elem_mask(v0, i);                \
875                                                               \
876         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
877     }                                                         \
878 }
879 
880 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
881 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
882 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
883 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
884 
885 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
886 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
887 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
888 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
889 
890 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
891 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
892                   CPURISCVState *env, uint32_t desc)                     \
893 {                                                                        \
894     uint32_t vl = env->vl;                                               \
895     uint32_t i;                                                          \
896                                                                          \
897     for (i = 0; i < vl; i++) {                                           \
898         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
899         uint8_t carry = vext_elem_mask(v0, i);                           \
900                                                                          \
901         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
902     }                                                                    \
903 }
904 
905 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
906 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
907 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
908 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
909 
910 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
911 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
912 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
913 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
914 
915 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
916                           (__typeof(N))(N + M) < N)
917 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
918 
919 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
920 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
921                   CPURISCVState *env, uint32_t desc)          \
922 {                                                             \
923     uint32_t vl = env->vl;                                    \
924     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
925     uint32_t i;                                               \
926                                                               \
927     for (i = 0; i < vl; i++) {                                \
928         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
929         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
930         uint8_t carry = vext_elem_mask(v0, i);                \
931                                                               \
932         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
933     }                                                         \
934     for (; i < vlmax; i++) {                                  \
935         vext_set_elem_mask(vd, i, 0);                         \
936     }                                                         \
937 }
938 
939 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
940 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
941 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
942 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
943 
944 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
945 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
946 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
947 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
948 
949 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
950 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
951                   void *vs2, CPURISCVState *env, uint32_t desc) \
952 {                                                               \
953     uint32_t vl = env->vl;                                      \
954     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);          \
955     uint32_t i;                                                 \
956                                                                 \
957     for (i = 0; i < vl; i++) {                                  \
958         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
959         uint8_t carry = vext_elem_mask(v0, i);                  \
960                                                                 \
961         vext_set_elem_mask(vd, i,                               \
962                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
963     }                                                           \
964     for (; i < vlmax; i++) {                                    \
965         vext_set_elem_mask(vd, i, 0);                           \
966     }                                                           \
967 }
968 
969 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
970 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
971 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
972 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
973 
974 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
975 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
976 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
977 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
978 
979 /* Vector Bitwise Logical Instructions */
980 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
981 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
982 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
983 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
984 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
985 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
986 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
987 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
988 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
989 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
990 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
991 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
992 GEN_VEXT_VV(vand_vv_b, 1, 1)
993 GEN_VEXT_VV(vand_vv_h, 2, 2)
994 GEN_VEXT_VV(vand_vv_w, 4, 4)
995 GEN_VEXT_VV(vand_vv_d, 8, 8)
996 GEN_VEXT_VV(vor_vv_b, 1, 1)
997 GEN_VEXT_VV(vor_vv_h, 2, 2)
998 GEN_VEXT_VV(vor_vv_w, 4, 4)
999 GEN_VEXT_VV(vor_vv_d, 8, 8)
1000 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1001 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1002 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1003 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1004 
1005 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1006 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1007 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1008 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1009 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1010 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1011 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1012 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1013 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1014 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1015 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1016 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1017 GEN_VEXT_VX(vand_vx_b, 1, 1)
1018 GEN_VEXT_VX(vand_vx_h, 2, 2)
1019 GEN_VEXT_VX(vand_vx_w, 4, 4)
1020 GEN_VEXT_VX(vand_vx_d, 8, 8)
1021 GEN_VEXT_VX(vor_vx_b, 1, 1)
1022 GEN_VEXT_VX(vor_vx_h, 2, 2)
1023 GEN_VEXT_VX(vor_vx_w, 4, 4)
1024 GEN_VEXT_VX(vor_vx_d, 8, 8)
1025 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1026 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1027 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1028 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1029 
1030 /* Vector Single-Width Bit Shift Instructions */
1031 #define DO_SLL(N, M)  (N << (M))
1032 #define DO_SRL(N, M)  (N >> (M))
1033 
1034 /* generate the helpers for shift instructions with two vector operators */
1035 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1036 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1037                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1038 {                                                                         \
1039     uint32_t vm = vext_vm(desc);                                          \
1040     uint32_t vl = env->vl;                                                \
1041     uint32_t i;                                                           \
1042                                                                           \
1043     for (i = 0; i < vl; i++) {                                            \
1044         if (!vm && !vext_elem_mask(v0, i)) {                              \
1045             continue;                                                     \
1046         }                                                                 \
1047         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1048         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1049         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1050     }                                                                     \
1051 }
1052 
1053 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1054 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1055 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1056 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1057 
1058 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1059 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1060 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1061 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1062 
1063 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1064 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1065 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1066 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1067 
1068 /* generate the helpers for shift instructions with one vector and one scalar */
1069 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1070 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1071         void *vs2, CPURISCVState *env, uint32_t desc)       \
1072 {                                                           \
1073     uint32_t vm = vext_vm(desc);                            \
1074     uint32_t vl = env->vl;                                  \
1075     uint32_t i;                                             \
1076                                                             \
1077     for (i = 0; i < vl; i++) {                              \
1078         if (!vm && !vext_elem_mask(v0, i)) {                \
1079             continue;                                       \
1080         }                                                   \
1081         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1082         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1083     }                                                       \
1084 }
1085 
1086 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1087 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1088 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1089 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1090 
1091 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1092 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1093 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1094 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1095 
1096 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1097 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1098 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1099 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1100 
1101 /* Vector Narrowing Integer Right Shift Instructions */
1102 GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1103 GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1104 GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1105 GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1106 GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1107 GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1108 GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1109 GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1110 GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1111 GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1112 GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1113 GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1114 
1115 /* Vector Integer Comparison Instructions */
1116 #define DO_MSEQ(N, M) (N == M)
1117 #define DO_MSNE(N, M) (N != M)
1118 #define DO_MSLT(N, M) (N < M)
1119 #define DO_MSLE(N, M) (N <= M)
1120 #define DO_MSGT(N, M) (N > M)
1121 
1122 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1123 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1124                   CPURISCVState *env, uint32_t desc)          \
1125 {                                                             \
1126     uint32_t vm = vext_vm(desc);                              \
1127     uint32_t vl = env->vl;                                    \
1128     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
1129     uint32_t i;                                               \
1130                                                               \
1131     for (i = 0; i < vl; i++) {                                \
1132         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1133         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1134         if (!vm && !vext_elem_mask(v0, i)) {                  \
1135             continue;                                         \
1136         }                                                     \
1137         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1138     }                                                         \
1139     for (; i < vlmax; i++) {                                  \
1140         vext_set_elem_mask(vd, i, 0);                         \
1141     }                                                         \
1142 }
1143 
1144 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1145 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1146 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1147 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1148 
1149 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1150 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1151 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1152 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1153 
1154 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1155 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1156 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1157 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1158 
1159 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1160 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1161 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1162 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1163 
1164 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1165 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1166 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1167 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1168 
1169 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1170 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1171 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1172 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1173 
1174 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1175 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1176                   CPURISCVState *env, uint32_t desc)                \
1177 {                                                                   \
1178     uint32_t vm = vext_vm(desc);                                    \
1179     uint32_t vl = env->vl;                                          \
1180     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
1181     uint32_t i;                                                     \
1182                                                                     \
1183     for (i = 0; i < vl; i++) {                                      \
1184         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1185         if (!vm && !vext_elem_mask(v0, i)) {                        \
1186             continue;                                               \
1187         }                                                           \
1188         vext_set_elem_mask(vd, i,                                   \
1189                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1190     }                                                               \
1191     for (; i < vlmax; i++) {                                        \
1192         vext_set_elem_mask(vd, i, 0);                               \
1193     }                                                               \
1194 }
1195 
1196 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1197 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1198 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1199 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1200 
1201 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1202 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1203 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1204 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1205 
1206 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1207 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1208 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1209 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1210 
1211 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1212 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1213 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1214 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1215 
1216 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1217 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1218 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1219 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1220 
1221 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1222 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1223 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1224 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1225 
1226 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1227 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1228 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1229 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1230 
1231 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1232 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1233 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1234 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1235 
1236 /* Vector Integer Min/Max Instructions */
1237 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1238 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1239 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1240 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1241 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1242 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1243 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1244 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1245 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1246 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1247 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1248 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1249 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1250 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1251 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1252 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1253 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1254 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1255 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1256 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1257 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1258 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1259 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1260 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1261 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1262 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1263 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1264 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1265 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1266 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1267 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1268 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1269 
1270 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1271 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1272 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1273 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1274 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1275 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1276 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1277 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1278 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1279 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1280 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1281 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1282 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1283 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1284 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1285 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1286 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1287 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1288 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1289 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1290 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1291 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1292 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1293 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1294 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1295 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1296 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1297 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1298 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1299 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1300 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1301 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1302 
1303 /* Vector Single-Width Integer Multiply Instructions */
1304 #define DO_MUL(N, M) (N * M)
1305 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1306 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1307 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1308 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1309 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1310 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1311 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1312 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1313 
1314 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1315 {
1316     return (int16_t)s2 * (int16_t)s1 >> 8;
1317 }
1318 
1319 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1320 {
1321     return (int32_t)s2 * (int32_t)s1 >> 16;
1322 }
1323 
1324 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1325 {
1326     return (int64_t)s2 * (int64_t)s1 >> 32;
1327 }
1328 
1329 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1330 {
1331     uint64_t hi_64, lo_64;
1332 
1333     muls64(&lo_64, &hi_64, s1, s2);
1334     return hi_64;
1335 }
1336 
1337 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1338 {
1339     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1340 }
1341 
1342 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1343 {
1344     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1345 }
1346 
1347 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1348 {
1349     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1350 }
1351 
1352 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1353 {
1354     uint64_t hi_64, lo_64;
1355 
1356     mulu64(&lo_64, &hi_64, s2, s1);
1357     return hi_64;
1358 }
1359 
1360 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1361 {
1362     return (int16_t)s2 * (uint16_t)s1 >> 8;
1363 }
1364 
1365 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1366 {
1367     return (int32_t)s2 * (uint32_t)s1 >> 16;
1368 }
1369 
1370 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1371 {
1372     return (int64_t)s2 * (uint64_t)s1 >> 32;
1373 }
1374 
1375 /*
1376  * Let  A = signed operand,
1377  *      B = unsigned operand
1378  *      P = mulu64(A, B), unsigned product
1379  *
1380  * LET  X = 2 ** 64  - A, 2's complement of A
1381  *      SP = signed product
1382  * THEN
1383  *      IF A < 0
1384  *          SP = -X * B
1385  *             = -(2 ** 64 - A) * B
1386  *             = A * B - 2 ** 64 * B
1387  *             = P - 2 ** 64 * B
1388  *      ELSE
1389  *          SP = P
1390  * THEN
1391  *      HI_P -= (A < 0 ? B : 0)
1392  */
1393 
1394 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1395 {
1396     uint64_t hi_64, lo_64;
1397 
1398     mulu64(&lo_64, &hi_64, s2, s1);
1399 
1400     hi_64 -= s2 < 0 ? s1 : 0;
1401     return hi_64;
1402 }
1403 
1404 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1405 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1406 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1407 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1408 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1409 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1410 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1411 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1412 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1413 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1414 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1415 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1416 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1417 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1418 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1419 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1420 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1421 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1422 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1423 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1424 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1425 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1426 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1427 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1428 
1429 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1430 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1431 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1432 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1433 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1434 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1435 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1436 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1437 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1438 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1439 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1440 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1441 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1442 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1443 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1444 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1445 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1446 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1447 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1448 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1449 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1450 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1451 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1452 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1453 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1454 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1455 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1456 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1457 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1458 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1459 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1460 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1461 
1462 /* Vector Integer Divide Instructions */
1463 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1464 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1465 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1466         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1467 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1468         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1469 
1470 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1471 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1472 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1473 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1474 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1475 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1476 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1477 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1478 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1479 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1480 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1481 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1482 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1483 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1484 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1485 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1486 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1487 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1488 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1489 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1490 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1491 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1492 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1493 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1494 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1495 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1496 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1497 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1498 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1499 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1500 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1501 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1502 
1503 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1504 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1505 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1506 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1507 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1508 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1509 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1510 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1511 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1512 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1513 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1514 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1515 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1516 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1517 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1518 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1519 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1520 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1521 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1522 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1523 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1524 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1525 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1526 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1527 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1528 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1529 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1530 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1531 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1532 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1533 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1534 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1535 
1536 /* Vector Widening Integer Multiply Instructions */
1537 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1538 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1539 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1540 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1541 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1542 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1543 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1544 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1545 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1546 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1547 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1548 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1549 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1550 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1551 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1552 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1553 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1554 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1555 
1556 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1557 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1558 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1559 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1560 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1561 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1562 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1563 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1564 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1565 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1566 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1567 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1568 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1569 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1570 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1571 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1572 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1573 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1574 
1575 /* Vector Single-Width Integer Multiply-Add Instructions */
1576 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1577 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1578 {                                                                  \
1579     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1580     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1581     TD d = *((TD *)vd + HD(i));                                    \
1582     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1583 }
1584 
1585 #define DO_MACC(N, M, D) (M * N + D)
1586 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1587 #define DO_MADD(N, M, D) (M * D + N)
1588 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1589 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1590 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1591 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1592 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1593 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1594 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1595 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1596 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1597 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1598 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1599 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1600 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1601 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1602 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1603 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1604 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1605 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1606 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1607 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1608 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1609 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1610 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1611 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1612 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1613 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1614 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1615 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1616 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1617 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1618 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1619 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1620 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1621 
1622 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1623 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1624 {                                                                   \
1625     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1626     TD d = *((TD *)vd + HD(i));                                     \
1627     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1628 }
1629 
1630 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1631 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1632 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1633 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1634 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1635 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1636 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1637 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1638 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1639 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1640 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1641 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1642 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1643 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1644 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1645 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1646 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1647 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1648 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1649 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1650 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1651 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1652 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1653 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1654 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1655 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1656 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1657 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1658 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1659 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1660 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1661 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1662 
1663 /* Vector Widening Integer Multiply-Add Instructions */
1664 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1665 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1666 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1667 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1668 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1669 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1670 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1671 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1672 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1673 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1674 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1675 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1676 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1677 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1678 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1679 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1680 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1681 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1682 
1683 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1684 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1685 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1686 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1687 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1688 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1689 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1690 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1691 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1692 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1693 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1694 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1695 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1696 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1697 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1698 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1699 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1700 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1701 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1702 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1703 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1704 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1705 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1706 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1707 
1708 /* Vector Integer Merge and Move Instructions */
1709 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1710 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1711                   uint32_t desc)                                     \
1712 {                                                                    \
1713     uint32_t vl = env->vl;                                           \
1714     uint32_t i;                                                      \
1715                                                                      \
1716     for (i = 0; i < vl; i++) {                                       \
1717         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1718         *((ETYPE *)vd + H(i)) = s1;                                  \
1719     }                                                                \
1720 }
1721 
1722 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1723 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1724 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1725 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1726 
1727 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1728 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1729                   uint32_t desc)                                     \
1730 {                                                                    \
1731     uint32_t vl = env->vl;                                           \
1732     uint32_t i;                                                      \
1733                                                                      \
1734     for (i = 0; i < vl; i++) {                                       \
1735         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1736     }                                                                \
1737 }
1738 
1739 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1740 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1741 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1742 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1743 
1744 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1745 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1746                   CPURISCVState *env, uint32_t desc)                 \
1747 {                                                                    \
1748     uint32_t vl = env->vl;                                           \
1749     uint32_t i;                                                      \
1750                                                                      \
1751     for (i = 0; i < vl; i++) {                                       \
1752         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1753         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1754     }                                                                \
1755 }
1756 
1757 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1758 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1759 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1760 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1761 
1762 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1763 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1764                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1765 {                                                                    \
1766     uint32_t vl = env->vl;                                           \
1767     uint32_t i;                                                      \
1768                                                                      \
1769     for (i = 0; i < vl; i++) {                                       \
1770         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1771         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1772                    (ETYPE)(target_long)s1);                          \
1773         *((ETYPE *)vd + H(i)) = d;                                   \
1774     }                                                                \
1775 }
1776 
1777 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1778 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1779 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1780 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1781 
1782 /*
1783  *** Vector Fixed-Point Arithmetic Instructions
1784  */
1785 
1786 /* Vector Single-Width Saturating Add and Subtract */
1787 
1788 /*
1789  * As fixed point instructions probably have round mode and saturation,
1790  * define common macros for fixed point here.
1791  */
1792 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1793                           CPURISCVState *env, int vxrm);
1794 
1795 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1796 static inline void                                                  \
1797 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1798           CPURISCVState *env, int vxrm)                             \
1799 {                                                                   \
1800     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1801     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1802     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1803 }
1804 
1805 static inline void
1806 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1807              CPURISCVState *env,
1808              uint32_t vl, uint32_t vm, int vxrm,
1809              opivv2_rm_fn *fn)
1810 {
1811     for (uint32_t i = 0; i < vl; i++) {
1812         if (!vm && !vext_elem_mask(v0, i)) {
1813             continue;
1814         }
1815         fn(vd, vs1, vs2, i, env, vxrm);
1816     }
1817 }
1818 
1819 static inline void
1820 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1821              CPURISCVState *env,
1822              uint32_t desc, uint32_t esz, uint32_t dsz,
1823              opivv2_rm_fn *fn)
1824 {
1825     uint32_t vm = vext_vm(desc);
1826     uint32_t vl = env->vl;
1827 
1828     switch (env->vxrm) {
1829     case 0: /* rnu */
1830         vext_vv_rm_1(vd, v0, vs1, vs2,
1831                      env, vl, vm, 0, fn);
1832         break;
1833     case 1: /* rne */
1834         vext_vv_rm_1(vd, v0, vs1, vs2,
1835                      env, vl, vm, 1, fn);
1836         break;
1837     case 2: /* rdn */
1838         vext_vv_rm_1(vd, v0, vs1, vs2,
1839                      env, vl, vm, 2, fn);
1840         break;
1841     default: /* rod */
1842         vext_vv_rm_1(vd, v0, vs1, vs2,
1843                      env, vl, vm, 3, fn);
1844         break;
1845     }
1846 }
1847 
1848 /* generate helpers for fixed point instructions with OPIVV format */
1849 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1850 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1851                   CPURISCVState *env, uint32_t desc)            \
1852 {                                                               \
1853     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1854                  do_##NAME);                                    \
1855 }
1856 
1857 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1858 {
1859     uint8_t res = a + b;
1860     if (res < a) {
1861         res = UINT8_MAX;
1862         env->vxsat = 0x1;
1863     }
1864     return res;
1865 }
1866 
1867 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1868                                uint16_t b)
1869 {
1870     uint16_t res = a + b;
1871     if (res < a) {
1872         res = UINT16_MAX;
1873         env->vxsat = 0x1;
1874     }
1875     return res;
1876 }
1877 
1878 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1879                                uint32_t b)
1880 {
1881     uint32_t res = a + b;
1882     if (res < a) {
1883         res = UINT32_MAX;
1884         env->vxsat = 0x1;
1885     }
1886     return res;
1887 }
1888 
1889 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1890                                uint64_t b)
1891 {
1892     uint64_t res = a + b;
1893     if (res < a) {
1894         res = UINT64_MAX;
1895         env->vxsat = 0x1;
1896     }
1897     return res;
1898 }
1899 
1900 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1901 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1902 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
1903 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
1904 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
1905 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
1906 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
1907 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
1908 
1909 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
1910                           CPURISCVState *env, int vxrm);
1911 
1912 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
1913 static inline void                                                  \
1914 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
1915           CPURISCVState *env, int vxrm)                             \
1916 {                                                                   \
1917     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1918     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
1919 }
1920 
1921 static inline void
1922 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
1923              CPURISCVState *env,
1924              uint32_t vl, uint32_t vm, int vxrm,
1925              opivx2_rm_fn *fn)
1926 {
1927     for (uint32_t i = 0; i < vl; i++) {
1928         if (!vm && !vext_elem_mask(v0, i)) {
1929             continue;
1930         }
1931         fn(vd, s1, vs2, i, env, vxrm);
1932     }
1933 }
1934 
1935 static inline void
1936 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
1937              CPURISCVState *env,
1938              uint32_t desc, uint32_t esz, uint32_t dsz,
1939              opivx2_rm_fn *fn)
1940 {
1941     uint32_t vm = vext_vm(desc);
1942     uint32_t vl = env->vl;
1943 
1944     switch (env->vxrm) {
1945     case 0: /* rnu */
1946         vext_vx_rm_1(vd, v0, s1, vs2,
1947                      env, vl, vm, 0, fn);
1948         break;
1949     case 1: /* rne */
1950         vext_vx_rm_1(vd, v0, s1, vs2,
1951                      env, vl, vm, 1, fn);
1952         break;
1953     case 2: /* rdn */
1954         vext_vx_rm_1(vd, v0, s1, vs2,
1955                      env, vl, vm, 2, fn);
1956         break;
1957     default: /* rod */
1958         vext_vx_rm_1(vd, v0, s1, vs2,
1959                      env, vl, vm, 3, fn);
1960         break;
1961     }
1962 }
1963 
1964 /* generate helpers for fixed point instructions with OPIVX format */
1965 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
1966 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
1967         void *vs2, CPURISCVState *env, uint32_t desc)     \
1968 {                                                         \
1969     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
1970                  do_##NAME);                              \
1971 }
1972 
1973 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
1974 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
1975 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
1976 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
1977 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
1978 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
1979 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
1980 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
1981 
1982 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
1983 {
1984     int8_t res = a + b;
1985     if ((res ^ a) & (res ^ b) & INT8_MIN) {
1986         res = a > 0 ? INT8_MAX : INT8_MIN;
1987         env->vxsat = 0x1;
1988     }
1989     return res;
1990 }
1991 
1992 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
1993 {
1994     int16_t res = a + b;
1995     if ((res ^ a) & (res ^ b) & INT16_MIN) {
1996         res = a > 0 ? INT16_MAX : INT16_MIN;
1997         env->vxsat = 0x1;
1998     }
1999     return res;
2000 }
2001 
2002 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2003 {
2004     int32_t res = a + b;
2005     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2006         res = a > 0 ? INT32_MAX : INT32_MIN;
2007         env->vxsat = 0x1;
2008     }
2009     return res;
2010 }
2011 
2012 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2013 {
2014     int64_t res = a + b;
2015     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2016         res = a > 0 ? INT64_MAX : INT64_MIN;
2017         env->vxsat = 0x1;
2018     }
2019     return res;
2020 }
2021 
2022 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2023 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2024 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2025 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2026 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2027 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2028 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2029 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2030 
2031 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2032 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2033 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2034 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2035 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2036 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2037 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2038 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2039 
2040 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2041 {
2042     uint8_t res = a - b;
2043     if (res > a) {
2044         res = 0;
2045         env->vxsat = 0x1;
2046     }
2047     return res;
2048 }
2049 
2050 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2051                                uint16_t b)
2052 {
2053     uint16_t res = a - b;
2054     if (res > a) {
2055         res = 0;
2056         env->vxsat = 0x1;
2057     }
2058     return res;
2059 }
2060 
2061 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2062                                uint32_t b)
2063 {
2064     uint32_t res = a - b;
2065     if (res > a) {
2066         res = 0;
2067         env->vxsat = 0x1;
2068     }
2069     return res;
2070 }
2071 
2072 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2073                                uint64_t b)
2074 {
2075     uint64_t res = a - b;
2076     if (res > a) {
2077         res = 0;
2078         env->vxsat = 0x1;
2079     }
2080     return res;
2081 }
2082 
2083 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2084 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2085 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2086 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2087 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2088 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2089 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2090 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2091 
2092 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2093 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2094 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2095 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2096 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2097 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2098 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2099 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2100 
2101 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2102 {
2103     int8_t res = a - b;
2104     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2105         res = a >= 0 ? INT8_MAX : INT8_MIN;
2106         env->vxsat = 0x1;
2107     }
2108     return res;
2109 }
2110 
2111 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2112 {
2113     int16_t res = a - b;
2114     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2115         res = a >= 0 ? INT16_MAX : INT16_MIN;
2116         env->vxsat = 0x1;
2117     }
2118     return res;
2119 }
2120 
2121 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2122 {
2123     int32_t res = a - b;
2124     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2125         res = a >= 0 ? INT32_MAX : INT32_MIN;
2126         env->vxsat = 0x1;
2127     }
2128     return res;
2129 }
2130 
2131 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2132 {
2133     int64_t res = a - b;
2134     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2135         res = a >= 0 ? INT64_MAX : INT64_MIN;
2136         env->vxsat = 0x1;
2137     }
2138     return res;
2139 }
2140 
2141 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2142 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2143 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2144 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2145 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2146 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2147 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2148 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2149 
2150 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2151 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2152 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2153 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2154 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2155 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2156 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2157 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2158 
2159 /* Vector Single-Width Averaging Add and Subtract */
2160 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2161 {
2162     uint8_t d = extract64(v, shift, 1);
2163     uint8_t d1;
2164     uint64_t D1, D2;
2165 
2166     if (shift == 0 || shift > 64) {
2167         return 0;
2168     }
2169 
2170     d1 = extract64(v, shift - 1, 1);
2171     D1 = extract64(v, 0, shift);
2172     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2173         return d1;
2174     } else if (vxrm == 1) { /* round-to-nearest-even */
2175         if (shift > 1) {
2176             D2 = extract64(v, 0, shift - 1);
2177             return d1 & ((D2 != 0) | d);
2178         } else {
2179             return d1 & d;
2180         }
2181     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2182         return !d & (D1 != 0);
2183     }
2184     return 0; /* round-down (truncate) */
2185 }
2186 
2187 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2188 {
2189     int64_t res = (int64_t)a + b;
2190     uint8_t round = get_round(vxrm, res, 1);
2191 
2192     return (res >> 1) + round;
2193 }
2194 
2195 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2196 {
2197     int64_t res = a + b;
2198     uint8_t round = get_round(vxrm, res, 1);
2199     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2200 
2201     /* With signed overflow, bit 64 is inverse of bit 63. */
2202     return ((res >> 1) ^ over) + round;
2203 }
2204 
2205 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2206 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2207 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2208 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2209 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2210 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2211 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2212 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2213 
2214 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2215 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2216 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2217 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2218 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2219 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2220 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2221 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2222 
2223 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2224 {
2225     int64_t res = (int64_t)a - b;
2226     uint8_t round = get_round(vxrm, res, 1);
2227 
2228     return (res >> 1) + round;
2229 }
2230 
2231 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2232 {
2233     int64_t res = (int64_t)a - b;
2234     uint8_t round = get_round(vxrm, res, 1);
2235     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2236 
2237     /* With signed overflow, bit 64 is inverse of bit 63. */
2238     return ((res >> 1) ^ over) + round;
2239 }
2240 
2241 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2242 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2243 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2244 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2245 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2246 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2247 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2248 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2249 
2250 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2251 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2252 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2253 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2254 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2255 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2256 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2257 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2258 
2259 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2260 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2261 {
2262     uint8_t round;
2263     int16_t res;
2264 
2265     res = (int16_t)a * (int16_t)b;
2266     round = get_round(vxrm, res, 7);
2267     res   = (res >> 7) + round;
2268 
2269     if (res > INT8_MAX) {
2270         env->vxsat = 0x1;
2271         return INT8_MAX;
2272     } else if (res < INT8_MIN) {
2273         env->vxsat = 0x1;
2274         return INT8_MIN;
2275     } else {
2276         return res;
2277     }
2278 }
2279 
2280 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2281 {
2282     uint8_t round;
2283     int32_t res;
2284 
2285     res = (int32_t)a * (int32_t)b;
2286     round = get_round(vxrm, res, 15);
2287     res   = (res >> 15) + round;
2288 
2289     if (res > INT16_MAX) {
2290         env->vxsat = 0x1;
2291         return INT16_MAX;
2292     } else if (res < INT16_MIN) {
2293         env->vxsat = 0x1;
2294         return INT16_MIN;
2295     } else {
2296         return res;
2297     }
2298 }
2299 
2300 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2301 {
2302     uint8_t round;
2303     int64_t res;
2304 
2305     res = (int64_t)a * (int64_t)b;
2306     round = get_round(vxrm, res, 31);
2307     res   = (res >> 31) + round;
2308 
2309     if (res > INT32_MAX) {
2310         env->vxsat = 0x1;
2311         return INT32_MAX;
2312     } else if (res < INT32_MIN) {
2313         env->vxsat = 0x1;
2314         return INT32_MIN;
2315     } else {
2316         return res;
2317     }
2318 }
2319 
2320 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2321 {
2322     uint8_t round;
2323     uint64_t hi_64, lo_64;
2324     int64_t res;
2325 
2326     if (a == INT64_MIN && b == INT64_MIN) {
2327         env->vxsat = 1;
2328         return INT64_MAX;
2329     }
2330 
2331     muls64(&lo_64, &hi_64, a, b);
2332     round = get_round(vxrm, lo_64, 63);
2333     /*
2334      * Cannot overflow, as there are always
2335      * 2 sign bits after multiply.
2336      */
2337     res = (hi_64 << 1) | (lo_64 >> 63);
2338     if (round) {
2339         if (res == INT64_MAX) {
2340             env->vxsat = 1;
2341         } else {
2342             res += 1;
2343         }
2344     }
2345     return res;
2346 }
2347 
2348 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2349 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2350 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2351 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2352 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2353 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2354 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2355 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2356 
2357 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2358 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2359 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2360 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2361 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2362 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2363 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2364 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2365 
2366 /* Vector Widening Saturating Scaled Multiply-Add */
2367 static inline uint16_t
2368 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b,
2369           uint16_t c)
2370 {
2371     uint8_t round;
2372     uint16_t res = (uint16_t)a * b;
2373 
2374     round = get_round(vxrm, res, 4);
2375     res   = (res >> 4) + round;
2376     return saddu16(env, vxrm, c, res);
2377 }
2378 
2379 static inline uint32_t
2380 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b,
2381            uint32_t c)
2382 {
2383     uint8_t round;
2384     uint32_t res = (uint32_t)a * b;
2385 
2386     round = get_round(vxrm, res, 8);
2387     res   = (res >> 8) + round;
2388     return saddu32(env, vxrm, c, res);
2389 }
2390 
2391 static inline uint64_t
2392 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b,
2393            uint64_t c)
2394 {
2395     uint8_t round;
2396     uint64_t res = (uint64_t)a * b;
2397 
2398     round = get_round(vxrm, res, 16);
2399     res   = (res >> 16) + round;
2400     return saddu64(env, vxrm, c, res);
2401 }
2402 
2403 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
2404 static inline void                                                 \
2405 do_##NAME(void *vd, void *vs1, void *vs2, int i,                   \
2406           CPURISCVState *env, int vxrm)                            \
2407 {                                                                  \
2408     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
2409     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
2410     TD d = *((TD *)vd + HD(i));                                    \
2411     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d);                \
2412 }
2413 
2414 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8)
2415 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16)
2416 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32)
2417 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2)
2418 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4)
2419 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8)
2420 
2421 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)         \
2422 static inline void                                                 \
2423 do_##NAME(void *vd, target_long s1, void *vs2, int i,              \
2424           CPURISCVState *env, int vxrm)                            \
2425 {                                                                  \
2426     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
2427     TD d = *((TD *)vd + HD(i));                                    \
2428     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d);       \
2429 }
2430 
2431 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8)
2432 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16)
2433 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32)
2434 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2)
2435 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4)
2436 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8)
2437 
2438 static inline int16_t
2439 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c)
2440 {
2441     uint8_t round;
2442     int16_t res = (int16_t)a * b;
2443 
2444     round = get_round(vxrm, res, 4);
2445     res   = (res >> 4) + round;
2446     return sadd16(env, vxrm, c, res);
2447 }
2448 
2449 static inline int32_t
2450 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c)
2451 {
2452     uint8_t round;
2453     int32_t res = (int32_t)a * b;
2454 
2455     round = get_round(vxrm, res, 8);
2456     res   = (res >> 8) + round;
2457     return sadd32(env, vxrm, c, res);
2458 
2459 }
2460 
2461 static inline int64_t
2462 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c)
2463 {
2464     uint8_t round;
2465     int64_t res = (int64_t)a * b;
2466 
2467     round = get_round(vxrm, res, 16);
2468     res   = (res >> 16) + round;
2469     return sadd64(env, vxrm, c, res);
2470 }
2471 
2472 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8)
2473 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16)
2474 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32)
2475 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2)
2476 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4)
2477 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8)
2478 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8)
2479 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16)
2480 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32)
2481 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2)
2482 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4)
2483 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8)
2484 
2485 static inline int16_t
2486 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c)
2487 {
2488     uint8_t round;
2489     int16_t res = a * (int16_t)b;
2490 
2491     round = get_round(vxrm, res, 4);
2492     res   = (res >> 4) + round;
2493     return ssub16(env, vxrm, c, res);
2494 }
2495 
2496 static inline int32_t
2497 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c)
2498 {
2499     uint8_t round;
2500     int32_t res = a * (int32_t)b;
2501 
2502     round = get_round(vxrm, res, 8);
2503     res   = (res >> 8) + round;
2504     return ssub32(env, vxrm, c, res);
2505 }
2506 
2507 static inline int64_t
2508 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c)
2509 {
2510     uint8_t round;
2511     int64_t res = a * (int64_t)b;
2512 
2513     round = get_round(vxrm, res, 16);
2514     res   = (res >> 16) + round;
2515     return ssub64(env, vxrm, c, res);
2516 }
2517 
2518 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8)
2519 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16)
2520 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32)
2521 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2)
2522 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4)
2523 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8)
2524 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8)
2525 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16)
2526 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32)
2527 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2)
2528 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4)
2529 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8)
2530 
2531 static inline int16_t
2532 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c)
2533 {
2534     uint8_t round;
2535     int16_t res = (int16_t)a * b;
2536 
2537     round = get_round(vxrm, res, 4);
2538     res   = (res >> 4) + round;
2539     return ssub16(env, vxrm, c, res);
2540 }
2541 
2542 static inline int32_t
2543 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c)
2544 {
2545     uint8_t round;
2546     int32_t res = (int32_t)a * b;
2547 
2548     round = get_round(vxrm, res, 8);
2549     res   = (res >> 8) + round;
2550     return ssub32(env, vxrm, c, res);
2551 }
2552 
2553 static inline int64_t
2554 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c)
2555 {
2556     uint8_t round;
2557     int64_t res = (int64_t)a * b;
2558 
2559     round = get_round(vxrm, res, 16);
2560     res   = (res >> 16) + round;
2561     return ssub64(env, vxrm, c, res);
2562 }
2563 
2564 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8)
2565 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16)
2566 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32)
2567 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2)
2568 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4)
2569 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8)
2570 
2571 /* Vector Single-Width Scaling Shift Instructions */
2572 static inline uint8_t
2573 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2574 {
2575     uint8_t round, shift = b & 0x7;
2576     uint8_t res;
2577 
2578     round = get_round(vxrm, a, shift);
2579     res   = (a >> shift)  + round;
2580     return res;
2581 }
2582 static inline uint16_t
2583 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2584 {
2585     uint8_t round, shift = b & 0xf;
2586     uint16_t res;
2587 
2588     round = get_round(vxrm, a, shift);
2589     res   = (a >> shift)  + round;
2590     return res;
2591 }
2592 static inline uint32_t
2593 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2594 {
2595     uint8_t round, shift = b & 0x1f;
2596     uint32_t res;
2597 
2598     round = get_round(vxrm, a, shift);
2599     res   = (a >> shift)  + round;
2600     return res;
2601 }
2602 static inline uint64_t
2603 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2604 {
2605     uint8_t round, shift = b & 0x3f;
2606     uint64_t res;
2607 
2608     round = get_round(vxrm, a, shift);
2609     res   = (a >> shift)  + round;
2610     return res;
2611 }
2612 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2613 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2614 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2615 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2616 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2617 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2618 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2619 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2620 
2621 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2622 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2623 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2624 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2625 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2626 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2627 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2628 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2629 
2630 static inline int8_t
2631 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2632 {
2633     uint8_t round, shift = b & 0x7;
2634     int8_t res;
2635 
2636     round = get_round(vxrm, a, shift);
2637     res   = (a >> shift)  + round;
2638     return res;
2639 }
2640 static inline int16_t
2641 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2642 {
2643     uint8_t round, shift = b & 0xf;
2644     int16_t res;
2645 
2646     round = get_round(vxrm, a, shift);
2647     res   = (a >> shift)  + round;
2648     return res;
2649 }
2650 static inline int32_t
2651 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2652 {
2653     uint8_t round, shift = b & 0x1f;
2654     int32_t res;
2655 
2656     round = get_round(vxrm, a, shift);
2657     res   = (a >> shift)  + round;
2658     return res;
2659 }
2660 static inline int64_t
2661 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2662 {
2663     uint8_t round, shift = b & 0x3f;
2664     int64_t res;
2665 
2666     round = get_round(vxrm, a, shift);
2667     res   = (a >> shift)  + round;
2668     return res;
2669 }
2670 
2671 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2672 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2673 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2674 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2675 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2676 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2677 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2678 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2679 
2680 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2681 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2682 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2683 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2684 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2685 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2686 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2687 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2688 
2689 /* Vector Narrowing Fixed-Point Clip Instructions */
2690 static inline int8_t
2691 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2692 {
2693     uint8_t round, shift = b & 0xf;
2694     int16_t res;
2695 
2696     round = get_round(vxrm, a, shift);
2697     res   = (a >> shift)  + round;
2698     if (res > INT8_MAX) {
2699         env->vxsat = 0x1;
2700         return INT8_MAX;
2701     } else if (res < INT8_MIN) {
2702         env->vxsat = 0x1;
2703         return INT8_MIN;
2704     } else {
2705         return res;
2706     }
2707 }
2708 
2709 static inline int16_t
2710 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2711 {
2712     uint8_t round, shift = b & 0x1f;
2713     int32_t res;
2714 
2715     round = get_round(vxrm, a, shift);
2716     res   = (a >> shift)  + round;
2717     if (res > INT16_MAX) {
2718         env->vxsat = 0x1;
2719         return INT16_MAX;
2720     } else if (res < INT16_MIN) {
2721         env->vxsat = 0x1;
2722         return INT16_MIN;
2723     } else {
2724         return res;
2725     }
2726 }
2727 
2728 static inline int32_t
2729 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2730 {
2731     uint8_t round, shift = b & 0x3f;
2732     int64_t res;
2733 
2734     round = get_round(vxrm, a, shift);
2735     res   = (a >> shift)  + round;
2736     if (res > INT32_MAX) {
2737         env->vxsat = 0x1;
2738         return INT32_MAX;
2739     } else if (res < INT32_MIN) {
2740         env->vxsat = 0x1;
2741         return INT32_MIN;
2742     } else {
2743         return res;
2744     }
2745 }
2746 
2747 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2748 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2749 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2750 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1)
2751 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2)
2752 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4)
2753 
2754 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8)
2755 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16)
2756 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32)
2757 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1)
2758 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2)
2759 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4)
2760 
2761 static inline uint8_t
2762 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2763 {
2764     uint8_t round, shift = b & 0xf;
2765     uint16_t res;
2766 
2767     round = get_round(vxrm, a, shift);
2768     res   = (a >> shift)  + round;
2769     if (res > UINT8_MAX) {
2770         env->vxsat = 0x1;
2771         return UINT8_MAX;
2772     } else {
2773         return res;
2774     }
2775 }
2776 
2777 static inline uint16_t
2778 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2779 {
2780     uint8_t round, shift = b & 0x1f;
2781     uint32_t res;
2782 
2783     round = get_round(vxrm, a, shift);
2784     res   = (a >> shift)  + round;
2785     if (res > UINT16_MAX) {
2786         env->vxsat = 0x1;
2787         return UINT16_MAX;
2788     } else {
2789         return res;
2790     }
2791 }
2792 
2793 static inline uint32_t
2794 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2795 {
2796     uint8_t round, shift = b & 0x3f;
2797     int64_t res;
2798 
2799     round = get_round(vxrm, a, shift);
2800     res   = (a >> shift)  + round;
2801     if (res > UINT32_MAX) {
2802         env->vxsat = 0x1;
2803         return UINT32_MAX;
2804     } else {
2805         return res;
2806     }
2807 }
2808 
2809 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2810 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2811 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2812 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1)
2813 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2)
2814 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4)
2815 
2816 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8)
2817 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16)
2818 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32)
2819 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1)
2820 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2)
2821 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4)
2822 
2823 /*
2824  *** Vector Float Point Arithmetic Instructions
2825  */
2826 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2827 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2828 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2829                       CPURISCVState *env)                      \
2830 {                                                              \
2831     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2832     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2833     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2834 }
2835 
2836 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2837 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2838                   void *vs2, CPURISCVState *env,          \
2839                   uint32_t desc)                          \
2840 {                                                         \
2841     uint32_t vm = vext_vm(desc);                          \
2842     uint32_t vl = env->vl;                                \
2843     uint32_t i;                                           \
2844                                                           \
2845     for (i = 0; i < vl; i++) {                            \
2846         if (!vm && !vext_elem_mask(v0, i)) {              \
2847             continue;                                     \
2848         }                                                 \
2849         do_##NAME(vd, vs1, vs2, i, env);                  \
2850     }                                                     \
2851 }
2852 
2853 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2854 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2855 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2856 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2857 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2858 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2859 
2860 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2861 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2862                       CPURISCVState *env)                      \
2863 {                                                              \
2864     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2865     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2866 }
2867 
2868 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2869 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2870                   void *vs2, CPURISCVState *env,          \
2871                   uint32_t desc)                          \
2872 {                                                         \
2873     uint32_t vm = vext_vm(desc);                          \
2874     uint32_t vl = env->vl;                                \
2875     uint32_t i;                                           \
2876                                                           \
2877     for (i = 0; i < vl; i++) {                            \
2878         if (!vm && !vext_elem_mask(v0, i)) {              \
2879             continue;                                     \
2880         }                                                 \
2881         do_##NAME(vd, s1, vs2, i, env);                   \
2882     }                                                     \
2883 }
2884 
2885 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2886 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2887 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2888 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2889 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2890 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2891 
2892 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2893 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2894 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2895 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2896 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2897 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2898 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2899 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2900 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2901 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2902 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2903 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2904 
2905 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2906 {
2907     return float16_sub(b, a, s);
2908 }
2909 
2910 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2911 {
2912     return float32_sub(b, a, s);
2913 }
2914 
2915 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2916 {
2917     return float64_sub(b, a, s);
2918 }
2919 
2920 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2921 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2922 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2923 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2924 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2925 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2926 
2927 /* Vector Widening Floating-Point Add/Subtract Instructions */
2928 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2929 {
2930     return float32_add(float16_to_float32(a, true, s),
2931             float16_to_float32(b, true, s), s);
2932 }
2933 
2934 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2935 {
2936     return float64_add(float32_to_float64(a, s),
2937             float32_to_float64(b, s), s);
2938 
2939 }
2940 
2941 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2942 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2943 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2944 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2945 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2946 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2947 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2948 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2949 
2950 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2951 {
2952     return float32_sub(float16_to_float32(a, true, s),
2953             float16_to_float32(b, true, s), s);
2954 }
2955 
2956 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2957 {
2958     return float64_sub(float32_to_float64(a, s),
2959             float32_to_float64(b, s), s);
2960 
2961 }
2962 
2963 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2964 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2965 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2966 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2967 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2968 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2969 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2970 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2971 
2972 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2973 {
2974     return float32_add(a, float16_to_float32(b, true, s), s);
2975 }
2976 
2977 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2978 {
2979     return float64_add(a, float32_to_float64(b, s), s);
2980 }
2981 
2982 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2983 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2984 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2985 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2986 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2987 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2988 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2989 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2990 
2991 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2992 {
2993     return float32_sub(a, float16_to_float32(b, true, s), s);
2994 }
2995 
2996 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2997 {
2998     return float64_sub(a, float32_to_float64(b, s), s);
2999 }
3000 
3001 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3002 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3003 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
3004 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
3005 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3006 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3007 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
3008 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
3009 
3010 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3011 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3012 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3013 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3014 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
3015 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
3016 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
3017 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3018 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3019 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3020 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
3021 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
3022 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
3023 
3024 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3025 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3026 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3027 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
3028 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
3029 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
3030 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3031 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3032 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3033 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
3034 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3035 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3036 
3037 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3038 {
3039     return float16_div(b, a, s);
3040 }
3041 
3042 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3043 {
3044     return float32_div(b, a, s);
3045 }
3046 
3047 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3048 {
3049     return float64_div(b, a, s);
3050 }
3051 
3052 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3053 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3054 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3055 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3056 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3057 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3058 
3059 /* Vector Widening Floating-Point Multiply */
3060 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3061 {
3062     return float32_mul(float16_to_float32(a, true, s),
3063             float16_to_float32(b, true, s), s);
3064 }
3065 
3066 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3067 {
3068     return float64_mul(float32_to_float64(a, s),
3069             float32_to_float64(b, s), s);
3070 
3071 }
3072 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3073 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3074 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3075 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3076 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3077 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3078 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3079 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3080 
3081 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3082 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3083 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3084         CPURISCVState *env)                                        \
3085 {                                                                  \
3086     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3087     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3088     TD d = *((TD *)vd + HD(i));                                    \
3089     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3090 }
3091 
3092 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3093 {
3094     return float16_muladd(a, b, d, 0, s);
3095 }
3096 
3097 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3098 {
3099     return float32_muladd(a, b, d, 0, s);
3100 }
3101 
3102 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3103 {
3104     return float64_muladd(a, b, d, 0, s);
3105 }
3106 
3107 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3108 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3109 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3110 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3111 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3112 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3113 
3114 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3115 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3116         CPURISCVState *env)                                       \
3117 {                                                                 \
3118     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3119     TD d = *((TD *)vd + HD(i));                                   \
3120     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3121 }
3122 
3123 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3124 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3125 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3126 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3127 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3128 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3129 
3130 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3131 {
3132     return float16_muladd(a, b, d,
3133             float_muladd_negate_c | float_muladd_negate_product, s);
3134 }
3135 
3136 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3137 {
3138     return float32_muladd(a, b, d,
3139             float_muladd_negate_c | float_muladd_negate_product, s);
3140 }
3141 
3142 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3143 {
3144     return float64_muladd(a, b, d,
3145             float_muladd_negate_c | float_muladd_negate_product, s);
3146 }
3147 
3148 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3149 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3150 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3151 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3152 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3153 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3154 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3155 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3156 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3157 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3158 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3159 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3160 
3161 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3162 {
3163     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3164 }
3165 
3166 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3167 {
3168     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3169 }
3170 
3171 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3172 {
3173     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3174 }
3175 
3176 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3177 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3178 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3179 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3180 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3181 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3182 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3183 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3184 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3185 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3186 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3187 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3188 
3189 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3190 {
3191     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3192 }
3193 
3194 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3195 {
3196     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3197 }
3198 
3199 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3200 {
3201     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3202 }
3203 
3204 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3205 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3206 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3207 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3208 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3209 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3210 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3211 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3212 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3213 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3214 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3215 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3216 
3217 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3218 {
3219     return float16_muladd(d, b, a, 0, s);
3220 }
3221 
3222 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3223 {
3224     return float32_muladd(d, b, a, 0, s);
3225 }
3226 
3227 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3228 {
3229     return float64_muladd(d, b, a, 0, s);
3230 }
3231 
3232 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3233 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3234 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3235 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3236 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3237 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3238 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3239 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3240 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3241 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3242 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3243 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3244 
3245 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3246 {
3247     return float16_muladd(d, b, a,
3248             float_muladd_negate_c | float_muladd_negate_product, s);
3249 }
3250 
3251 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3252 {
3253     return float32_muladd(d, b, a,
3254             float_muladd_negate_c | float_muladd_negate_product, s);
3255 }
3256 
3257 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3258 {
3259     return float64_muladd(d, b, a,
3260             float_muladd_negate_c | float_muladd_negate_product, s);
3261 }
3262 
3263 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3264 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3265 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3266 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3267 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3268 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3269 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3270 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3271 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3272 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3273 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3274 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3275 
3276 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3277 {
3278     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3279 }
3280 
3281 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3282 {
3283     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3284 }
3285 
3286 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3287 {
3288     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3289 }
3290 
3291 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3292 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3293 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3294 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3295 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3296 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3297 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3298 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3299 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3300 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3301 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3302 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3303 
3304 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3305 {
3306     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3307 }
3308 
3309 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3310 {
3311     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3312 }
3313 
3314 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3315 {
3316     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3317 }
3318 
3319 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3320 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3321 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3322 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3323 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3324 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3325 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3326 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3327 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3328 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3329 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3330 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3331 
3332 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3333 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3334 {
3335     return float32_muladd(float16_to_float32(a, true, s),
3336                         float16_to_float32(b, true, s), d, 0, s);
3337 }
3338 
3339 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3340 {
3341     return float64_muladd(float32_to_float64(a, s),
3342                         float32_to_float64(b, s), d, 0, s);
3343 }
3344 
3345 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3346 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3347 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3348 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3349 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3350 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3351 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3352 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3353 
3354 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3355 {
3356     return float32_muladd(float16_to_float32(a, true, s),
3357                         float16_to_float32(b, true, s), d,
3358                         float_muladd_negate_c | float_muladd_negate_product, s);
3359 }
3360 
3361 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3362 {
3363     return float64_muladd(float32_to_float64(a, s),
3364                         float32_to_float64(b, s), d,
3365                         float_muladd_negate_c | float_muladd_negate_product, s);
3366 }
3367 
3368 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3369 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3370 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3371 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3372 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3373 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3374 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3375 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3376 
3377 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3378 {
3379     return float32_muladd(float16_to_float32(a, true, s),
3380                         float16_to_float32(b, true, s), d,
3381                         float_muladd_negate_c, s);
3382 }
3383 
3384 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3385 {
3386     return float64_muladd(float32_to_float64(a, s),
3387                         float32_to_float64(b, s), d,
3388                         float_muladd_negate_c, s);
3389 }
3390 
3391 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3392 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3393 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3394 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3395 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3396 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3397 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3398 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3399 
3400 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3401 {
3402     return float32_muladd(float16_to_float32(a, true, s),
3403                         float16_to_float32(b, true, s), d,
3404                         float_muladd_negate_product, s);
3405 }
3406 
3407 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3408 {
3409     return float64_muladd(float32_to_float64(a, s),
3410                         float32_to_float64(b, s), d,
3411                         float_muladd_negate_product, s);
3412 }
3413 
3414 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3415 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3416 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3417 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3418 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3419 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3420 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3421 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3422 
3423 /* Vector Floating-Point Square-Root Instruction */
3424 /* (TD, T2, TX2) */
3425 #define OP_UU_H uint16_t, uint16_t, uint16_t
3426 #define OP_UU_W uint32_t, uint32_t, uint32_t
3427 #define OP_UU_D uint64_t, uint64_t, uint64_t
3428 
3429 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3430 static void do_##NAME(void *vd, void *vs2, int i,      \
3431         CPURISCVState *env)                            \
3432 {                                                      \
3433     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3434     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3435 }
3436 
3437 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3438 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3439         CPURISCVState *env, uint32_t desc)             \
3440 {                                                      \
3441     uint32_t vm = vext_vm(desc);                       \
3442     uint32_t vl = env->vl;                             \
3443     uint32_t i;                                        \
3444                                                        \
3445     if (vl == 0) {                                     \
3446         return;                                        \
3447     }                                                  \
3448     for (i = 0; i < vl; i++) {                         \
3449         if (!vm && !vext_elem_mask(v0, i)) {           \
3450             continue;                                  \
3451         }                                              \
3452         do_##NAME(vd, vs2, i, env);                    \
3453     }                                                  \
3454 }
3455 
3456 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3457 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3458 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3459 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3460 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3461 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3462 
3463 /* Vector Floating-Point MIN/MAX Instructions */
3464 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum)
3465 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum)
3466 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum)
3467 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3468 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3469 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3470 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum)
3471 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum)
3472 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum)
3473 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3474 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3475 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3476 
3477 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum)
3478 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum)
3479 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum)
3480 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3481 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3482 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3483 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum)
3484 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum)
3485 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum)
3486 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3487 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3488 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3489 
3490 /* Vector Floating-Point Sign-Injection Instructions */
3491 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3492 {
3493     return deposit64(b, 0, 15, a);
3494 }
3495 
3496 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3497 {
3498     return deposit64(b, 0, 31, a);
3499 }
3500 
3501 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3502 {
3503     return deposit64(b, 0, 63, a);
3504 }
3505 
3506 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3507 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3508 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3509 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3510 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3511 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3512 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3513 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3514 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3515 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3516 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3517 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3518 
3519 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3520 {
3521     return deposit64(~b, 0, 15, a);
3522 }
3523 
3524 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3525 {
3526     return deposit64(~b, 0, 31, a);
3527 }
3528 
3529 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3530 {
3531     return deposit64(~b, 0, 63, a);
3532 }
3533 
3534 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3535 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3536 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3537 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3538 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3539 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3540 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3541 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3542 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3543 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3544 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3545 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3546 
3547 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3548 {
3549     return deposit64(b ^ a, 0, 15, a);
3550 }
3551 
3552 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3553 {
3554     return deposit64(b ^ a, 0, 31, a);
3555 }
3556 
3557 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3558 {
3559     return deposit64(b ^ a, 0, 63, a);
3560 }
3561 
3562 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3563 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3564 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3565 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3566 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3567 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3568 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3569 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3570 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3571 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3572 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3573 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3574 
3575 /* Vector Floating-Point Compare Instructions */
3576 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3577 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3578                   CPURISCVState *env, uint32_t desc)          \
3579 {                                                             \
3580     uint32_t vm = vext_vm(desc);                              \
3581     uint32_t vl = env->vl;                                    \
3582     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
3583     uint32_t i;                                               \
3584                                                               \
3585     for (i = 0; i < vl; i++) {                                \
3586         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3587         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3588         if (!vm && !vext_elem_mask(v0, i)) {                  \
3589             continue;                                         \
3590         }                                                     \
3591         vext_set_elem_mask(vd, i,                             \
3592                            DO_OP(s2, s1, &env->fp_status));   \
3593     }                                                         \
3594     for (; i < vlmax; i++) {                                  \
3595         vext_set_elem_mask(vd, i, 0);                         \
3596     }                                                         \
3597 }
3598 
3599 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3600 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3601 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3602 
3603 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3604 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3605                   CPURISCVState *env, uint32_t desc)                \
3606 {                                                                   \
3607     uint32_t vm = vext_vm(desc);                                    \
3608     uint32_t vl = env->vl;                                          \
3609     uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
3610     uint32_t i;                                                     \
3611                                                                     \
3612     for (i = 0; i < vl; i++) {                                      \
3613         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3614         if (!vm && !vext_elem_mask(v0, i)) {                        \
3615             continue;                                               \
3616         }                                                           \
3617         vext_set_elem_mask(vd, i,                                   \
3618                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3619     }                                                               \
3620     for (; i < vlmax; i++) {                                        \
3621         vext_set_elem_mask(vd, i, 0);                               \
3622     }                                                               \
3623 }
3624 
3625 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3626 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3627 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3628 
3629 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3630 {
3631     FloatRelation compare = float16_compare_quiet(a, b, s);
3632     return compare != float_relation_equal;
3633 }
3634 
3635 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3636 {
3637     FloatRelation compare = float32_compare_quiet(a, b, s);
3638     return compare != float_relation_equal;
3639 }
3640 
3641 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3642 {
3643     FloatRelation compare = float64_compare_quiet(a, b, s);
3644     return compare != float_relation_equal;
3645 }
3646 
3647 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3648 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3649 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3650 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3651 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3652 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3653 
3654 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3655 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3656 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3657 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3658 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3659 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3660 
3661 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3662 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
3663 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
3664 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
3665 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
3666 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
3667 
3668 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
3669 {
3670     FloatRelation compare = float16_compare(a, b, s);
3671     return compare == float_relation_greater;
3672 }
3673 
3674 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
3675 {
3676     FloatRelation compare = float32_compare(a, b, s);
3677     return compare == float_relation_greater;
3678 }
3679 
3680 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
3681 {
3682     FloatRelation compare = float64_compare(a, b, s);
3683     return compare == float_relation_greater;
3684 }
3685 
3686 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
3687 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
3688 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
3689 
3690 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
3691 {
3692     FloatRelation compare = float16_compare(a, b, s);
3693     return compare == float_relation_greater ||
3694            compare == float_relation_equal;
3695 }
3696 
3697 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
3698 {
3699     FloatRelation compare = float32_compare(a, b, s);
3700     return compare == float_relation_greater ||
3701            compare == float_relation_equal;
3702 }
3703 
3704 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
3705 {
3706     FloatRelation compare = float64_compare(a, b, s);
3707     return compare == float_relation_greater ||
3708            compare == float_relation_equal;
3709 }
3710 
3711 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
3712 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
3713 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
3714 
3715 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet)
3716 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet)
3717 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet)
3718 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet)
3719 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet)
3720 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet)
3721 
3722 /* Vector Floating-Point Classify Instruction */
3723 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
3724 static void do_##NAME(void *vd, void *vs2, int i)      \
3725 {                                                      \
3726     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3727     *((TD *)vd + HD(i)) = OP(s2);                      \
3728 }
3729 
3730 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
3731 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3732                   CPURISCVState *env, uint32_t desc)   \
3733 {                                                      \
3734     uint32_t vm = vext_vm(desc);                       \
3735     uint32_t vl = env->vl;                             \
3736     uint32_t i;                                        \
3737                                                        \
3738     for (i = 0; i < vl; i++) {                         \
3739         if (!vm && !vext_elem_mask(v0, i)) {           \
3740             continue;                                  \
3741         }                                              \
3742         do_##NAME(vd, vs2, i);                         \
3743     }                                                  \
3744 }
3745 
3746 target_ulong fclass_h(uint64_t frs1)
3747 {
3748     float16 f = frs1;
3749     bool sign = float16_is_neg(f);
3750 
3751     if (float16_is_infinity(f)) {
3752         return sign ? 1 << 0 : 1 << 7;
3753     } else if (float16_is_zero(f)) {
3754         return sign ? 1 << 3 : 1 << 4;
3755     } else if (float16_is_zero_or_denormal(f)) {
3756         return sign ? 1 << 2 : 1 << 5;
3757     } else if (float16_is_any_nan(f)) {
3758         float_status s = { }; /* for snan_bit_is_one */
3759         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3760     } else {
3761         return sign ? 1 << 1 : 1 << 6;
3762     }
3763 }
3764 
3765 target_ulong fclass_s(uint64_t frs1)
3766 {
3767     float32 f = frs1;
3768     bool sign = float32_is_neg(f);
3769 
3770     if (float32_is_infinity(f)) {
3771         return sign ? 1 << 0 : 1 << 7;
3772     } else if (float32_is_zero(f)) {
3773         return sign ? 1 << 3 : 1 << 4;
3774     } else if (float32_is_zero_or_denormal(f)) {
3775         return sign ? 1 << 2 : 1 << 5;
3776     } else if (float32_is_any_nan(f)) {
3777         float_status s = { }; /* for snan_bit_is_one */
3778         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3779     } else {
3780         return sign ? 1 << 1 : 1 << 6;
3781     }
3782 }
3783 
3784 target_ulong fclass_d(uint64_t frs1)
3785 {
3786     float64 f = frs1;
3787     bool sign = float64_is_neg(f);
3788 
3789     if (float64_is_infinity(f)) {
3790         return sign ? 1 << 0 : 1 << 7;
3791     } else if (float64_is_zero(f)) {
3792         return sign ? 1 << 3 : 1 << 4;
3793     } else if (float64_is_zero_or_denormal(f)) {
3794         return sign ? 1 << 2 : 1 << 5;
3795     } else if (float64_is_any_nan(f)) {
3796         float_status s = { }; /* for snan_bit_is_one */
3797         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3798     } else {
3799         return sign ? 1 << 1 : 1 << 6;
3800     }
3801 }
3802 
3803 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
3804 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
3805 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
3806 GEN_VEXT_V(vfclass_v_h, 2, 2)
3807 GEN_VEXT_V(vfclass_v_w, 4, 4)
3808 GEN_VEXT_V(vfclass_v_d, 8, 8)
3809 
3810 /* Vector Floating-Point Merge Instruction */
3811 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
3812 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3813                   CPURISCVState *env, uint32_t desc)          \
3814 {                                                             \
3815     uint32_t vm = vext_vm(desc);                              \
3816     uint32_t vl = env->vl;                                    \
3817     uint32_t i;                                               \
3818                                                               \
3819     for (i = 0; i < vl; i++) {                                \
3820         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3821         *((ETYPE *)vd + H(i))                                 \
3822           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
3823     }                                                         \
3824 }
3825 
3826 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
3827 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
3828 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
3829 
3830 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
3831 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3832 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
3833 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
3834 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
3835 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
3836 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
3837 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
3838 
3839 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
3840 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
3841 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
3842 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
3843 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
3844 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
3845 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
3846 
3847 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
3848 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
3849 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
3850 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
3851 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
3852 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
3853 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
3854 
3855 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
3856 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
3857 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
3858 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
3859 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
3860 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
3861 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
3862 
3863 /* Widening Floating-Point/Integer Type-Convert Instructions */
3864 /* (TD, T2, TX2) */
3865 #define WOP_UU_H uint32_t, uint16_t, uint16_t
3866 #define WOP_UU_W uint64_t, uint32_t, uint32_t
3867 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
3868 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
3869 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
3870 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
3871 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
3872 
3873 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
3874 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
3875 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
3876 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
3877 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
3878 
3879 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3880 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
3881 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
3882 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
3883 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
3884 
3885 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3886 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
3887 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
3888 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
3889 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
3890 
3891 /*
3892  * vfwcvt.f.f.v vd, vs2, vm #
3893  * Convert single-width float to double-width float.
3894  */
3895 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
3896 {
3897     return float16_to_float32(a, true, s);
3898 }
3899 
3900 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
3901 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
3902 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
3903 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
3904 
3905 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
3906 /* (TD, T2, TX2) */
3907 #define NOP_UU_H uint16_t, uint32_t, uint32_t
3908 #define NOP_UU_W uint32_t, uint64_t, uint64_t
3909 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3910 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16)
3911 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32)
3912 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2)
3913 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4)
3914 
3915 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
3916 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16)
3917 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32)
3918 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2)
3919 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4)
3920 
3921 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
3922 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16)
3923 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32)
3924 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2)
3925 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4)
3926 
3927 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
3928 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16)
3929 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32)
3930 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2)
3931 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4)
3932 
3933 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
3934 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
3935 {
3936     return float32_to_float16(a, true, s);
3937 }
3938 
3939 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
3940 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
3941 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2)
3942 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4)
3943 
3944 /*
3945  *** Vector Reduction Operations
3946  */
3947 /* Vector Single-Width Integer Reduction Instructions */
3948 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
3949 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
3950         void *vs2, CPURISCVState *env, uint32_t desc)     \
3951 {                                                         \
3952     uint32_t vm = vext_vm(desc);                          \
3953     uint32_t vl = env->vl;                                \
3954     uint32_t i;                                           \
3955     TD s1 =  *((TD *)vs1 + HD(0));                        \
3956                                                           \
3957     for (i = 0; i < vl; i++) {                            \
3958         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
3959         if (!vm && !vext_elem_mask(v0, i)) {              \
3960             continue;                                     \
3961         }                                                 \
3962         s1 = OP(s1, (TD)s2);                              \
3963     }                                                     \
3964     *((TD *)vd + HD(0)) = s1;                             \
3965 }
3966 
3967 /* vd[0] = sum(vs1[0], vs2[*]) */
3968 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
3969 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
3970 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
3971 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
3972 
3973 /* vd[0] = maxu(vs1[0], vs2[*]) */
3974 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
3975 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
3976 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
3977 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
3978 
3979 /* vd[0] = max(vs1[0], vs2[*]) */
3980 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
3981 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
3982 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
3983 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
3984 
3985 /* vd[0] = minu(vs1[0], vs2[*]) */
3986 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
3987 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
3988 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
3989 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
3990 
3991 /* vd[0] = min(vs1[0], vs2[*]) */
3992 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
3993 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
3994 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
3995 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
3996 
3997 /* vd[0] = and(vs1[0], vs2[*]) */
3998 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
3999 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4000 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4001 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4002 
4003 /* vd[0] = or(vs1[0], vs2[*]) */
4004 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4005 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4006 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4007 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4008 
4009 /* vd[0] = xor(vs1[0], vs2[*]) */
4010 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4011 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4012 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4013 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4014 
4015 /* Vector Widening Integer Reduction Instructions */
4016 /* signed sum reduction into double-width accumulator */
4017 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4018 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4019 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4020 
4021 /* Unsigned sum reduction into double-width accumulator */
4022 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4023 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4024 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4025 
4026 /* Vector Single-Width Floating-Point Reduction Instructions */
4027 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4028 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4029                   void *vs2, CPURISCVState *env,           \
4030                   uint32_t desc)                           \
4031 {                                                          \
4032     uint32_t vm = vext_vm(desc);                           \
4033     uint32_t vl = env->vl;                                 \
4034     uint32_t i;                                            \
4035     TD s1 =  *((TD *)vs1 + HD(0));                         \
4036                                                            \
4037     for (i = 0; i < vl; i++) {                             \
4038         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4039         if (!vm && !vext_elem_mask(v0, i)) {               \
4040             continue;                                      \
4041         }                                                  \
4042         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4043     }                                                      \
4044     *((TD *)vd + HD(0)) = s1;                              \
4045 }
4046 
4047 /* Unordered sum */
4048 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4049 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4050 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4051 
4052 /* Maximum value */
4053 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum)
4054 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum)
4055 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum)
4056 
4057 /* Minimum value */
4058 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum)
4059 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum)
4060 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum)
4061 
4062 /* Vector Widening Floating-Point Reduction Instructions */
4063 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4064 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4065                             void *vs2, CPURISCVState *env, uint32_t desc)
4066 {
4067     uint32_t vm = vext_vm(desc);
4068     uint32_t vl = env->vl;
4069     uint32_t i;
4070     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4071 
4072     for (i = 0; i < vl; i++) {
4073         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4074         if (!vm && !vext_elem_mask(v0, i)) {
4075             continue;
4076         }
4077         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4078                          &env->fp_status);
4079     }
4080     *((uint32_t *)vd + H4(0)) = s1;
4081 }
4082 
4083 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4084                             void *vs2, CPURISCVState *env, uint32_t desc)
4085 {
4086     uint32_t vm = vext_vm(desc);
4087     uint32_t vl = env->vl;
4088     uint32_t i;
4089     uint64_t s1 =  *((uint64_t *)vs1);
4090 
4091     for (i = 0; i < vl; i++) {
4092         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4093         if (!vm && !vext_elem_mask(v0, i)) {
4094             continue;
4095         }
4096         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4097                          &env->fp_status);
4098     }
4099     *((uint64_t *)vd) = s1;
4100 }
4101 
4102 /*
4103  *** Vector Mask Operations
4104  */
4105 /* Vector Mask-Register Logical Instructions */
4106 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4107 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4108                   void *vs2, CPURISCVState *env,          \
4109                   uint32_t desc)                          \
4110 {                                                         \
4111     uint32_t vlmax = env_archcpu(env)->cfg.vlen;          \
4112     uint32_t vl = env->vl;                                \
4113     uint32_t i;                                           \
4114     int a, b;                                             \
4115                                                           \
4116     for (i = 0; i < vl; i++) {                            \
4117         a = vext_elem_mask(vs1, i);                       \
4118         b = vext_elem_mask(vs2, i);                       \
4119         vext_set_elem_mask(vd, i, OP(b, a));              \
4120     }                                                     \
4121     for (; i < vlmax; i++) {                              \
4122         vext_set_elem_mask(vd, i, 0);                     \
4123     }                                                     \
4124 }
4125 
4126 #define DO_NAND(N, M)  (!(N & M))
4127 #define DO_ANDNOT(N, M)  (N & !M)
4128 #define DO_NOR(N, M)  (!(N | M))
4129 #define DO_ORNOT(N, M)  (N | !M)
4130 #define DO_XNOR(N, M)  (!(N ^ M))
4131 
4132 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4133 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4134 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
4135 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4136 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4137 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4138 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
4139 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4140 
4141 /* Vector mask population count vmpopc */
4142 target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env,
4143                               uint32_t desc)
4144 {
4145     target_ulong cnt = 0;
4146     uint32_t vm = vext_vm(desc);
4147     uint32_t vl = env->vl;
4148     int i;
4149 
4150     for (i = 0; i < vl; i++) {
4151         if (vm || vext_elem_mask(v0, i)) {
4152             if (vext_elem_mask(vs2, i)) {
4153                 cnt++;
4154             }
4155         }
4156     }
4157     return cnt;
4158 }
4159 
4160 /* vmfirst find-first-set mask bit*/
4161 target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4162                                uint32_t desc)
4163 {
4164     uint32_t vm = vext_vm(desc);
4165     uint32_t vl = env->vl;
4166     int i;
4167 
4168     for (i = 0; i < vl; i++) {
4169         if (vm || vext_elem_mask(v0, i)) {
4170             if (vext_elem_mask(vs2, i)) {
4171                 return i;
4172             }
4173         }
4174     }
4175     return -1LL;
4176 }
4177 
4178 enum set_mask_type {
4179     ONLY_FIRST = 1,
4180     INCLUDE_FIRST,
4181     BEFORE_FIRST,
4182 };
4183 
4184 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4185                    uint32_t desc, enum set_mask_type type)
4186 {
4187     uint32_t vlmax = env_archcpu(env)->cfg.vlen;
4188     uint32_t vm = vext_vm(desc);
4189     uint32_t vl = env->vl;
4190     int i;
4191     bool first_mask_bit = false;
4192 
4193     for (i = 0; i < vl; i++) {
4194         if (!vm && !vext_elem_mask(v0, i)) {
4195             continue;
4196         }
4197         /* write a zero to all following active elements */
4198         if (first_mask_bit) {
4199             vext_set_elem_mask(vd, i, 0);
4200             continue;
4201         }
4202         if (vext_elem_mask(vs2, i)) {
4203             first_mask_bit = true;
4204             if (type == BEFORE_FIRST) {
4205                 vext_set_elem_mask(vd, i, 0);
4206             } else {
4207                 vext_set_elem_mask(vd, i, 1);
4208             }
4209         } else {
4210             if (type == ONLY_FIRST) {
4211                 vext_set_elem_mask(vd, i, 0);
4212             } else {
4213                 vext_set_elem_mask(vd, i, 1);
4214             }
4215         }
4216     }
4217     for (; i < vlmax; i++) {
4218         vext_set_elem_mask(vd, i, 0);
4219     }
4220 }
4221 
4222 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4223                      uint32_t desc)
4224 {
4225     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4226 }
4227 
4228 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4229                      uint32_t desc)
4230 {
4231     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4232 }
4233 
4234 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4235                      uint32_t desc)
4236 {
4237     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4238 }
4239 
4240 /* Vector Iota Instruction */
4241 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4242 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4243                   uint32_t desc)                                          \
4244 {                                                                         \
4245     uint32_t vm = vext_vm(desc);                                          \
4246     uint32_t vl = env->vl;                                                \
4247     uint32_t sum = 0;                                                     \
4248     int i;                                                                \
4249                                                                           \
4250     for (i = 0; i < vl; i++) {                                            \
4251         if (!vm && !vext_elem_mask(v0, i)) {                              \
4252             continue;                                                     \
4253         }                                                                 \
4254         *((ETYPE *)vd + H(i)) = sum;                                      \
4255         if (vext_elem_mask(vs2, i)) {                                     \
4256             sum++;                                                        \
4257         }                                                                 \
4258     }                                                                     \
4259 }
4260 
4261 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4262 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4263 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4264 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4265 
4266 /* Vector Element Index Instruction */
4267 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4268 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4269 {                                                                         \
4270     uint32_t vm = vext_vm(desc);                                          \
4271     uint32_t vl = env->vl;                                                \
4272     int i;                                                                \
4273                                                                           \
4274     for (i = 0; i < vl; i++) {                                            \
4275         if (!vm && !vext_elem_mask(v0, i)) {                              \
4276             continue;                                                     \
4277         }                                                                 \
4278         *((ETYPE *)vd + H(i)) = i;                                        \
4279     }                                                                     \
4280 }
4281 
4282 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4283 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4284 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4285 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4286 
4287 /*
4288  *** Vector Permutation Instructions
4289  */
4290 
4291 /* Vector Slide Instructions */
4292 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4293 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4294                   CPURISCVState *env, uint32_t desc)                      \
4295 {                                                                         \
4296     uint32_t vm = vext_vm(desc);                                          \
4297     uint32_t vl = env->vl;                                                \
4298     target_ulong offset = s1, i;                                          \
4299                                                                           \
4300     for (i = offset; i < vl; i++) {                                       \
4301         if (!vm && !vext_elem_mask(v0, i)) {                              \
4302             continue;                                                     \
4303         }                                                                 \
4304         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4305     }                                                                     \
4306 }
4307 
4308 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4309 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4310 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4311 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4312 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4313 
4314 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4315 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4316                   CPURISCVState *env, uint32_t desc)                      \
4317 {                                                                         \
4318     uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
4319     uint32_t vm = vext_vm(desc);                                          \
4320     uint32_t vl = env->vl;                                                \
4321     target_ulong offset = s1, i;                                          \
4322                                                                           \
4323     for (i = 0; i < vl; ++i) {                                            \
4324         target_ulong j = i + offset;                                      \
4325         if (!vm && !vext_elem_mask(v0, i)) {                              \
4326             continue;                                                     \
4327         }                                                                 \
4328         *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j));  \
4329     }                                                                     \
4330 }
4331 
4332 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4333 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4334 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4335 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4336 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4337 
4338 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H)                             \
4339 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4340                   CPURISCVState *env, uint32_t desc)                      \
4341 {                                                                         \
4342     uint32_t vm = vext_vm(desc);                                          \
4343     uint32_t vl = env->vl;                                                \
4344     uint32_t i;                                                           \
4345                                                                           \
4346     for (i = 0; i < vl; i++) {                                            \
4347         if (!vm && !vext_elem_mask(v0, i)) {                              \
4348             continue;                                                     \
4349         }                                                                 \
4350         if (i == 0) {                                                     \
4351             *((ETYPE *)vd + H(i)) = s1;                                   \
4352         } else {                                                          \
4353             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));           \
4354         }                                                                 \
4355     }                                                                     \
4356 }
4357 
4358 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4359 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t,  H1)
4360 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2)
4361 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4)
4362 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8)
4363 
4364 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H)                           \
4365 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4366                   CPURISCVState *env, uint32_t desc)                      \
4367 {                                                                         \
4368     uint32_t vm = vext_vm(desc);                                          \
4369     uint32_t vl = env->vl;                                                \
4370     uint32_t i;                                                           \
4371                                                                           \
4372     for (i = 0; i < vl; i++) {                                            \
4373         if (!vm && !vext_elem_mask(v0, i)) {                              \
4374             continue;                                                     \
4375         }                                                                 \
4376         if (i == vl - 1) {                                                \
4377             *((ETYPE *)vd + H(i)) = s1;                                   \
4378         } else {                                                          \
4379             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));           \
4380         }                                                                 \
4381     }                                                                     \
4382 }
4383 
4384 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4385 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t,  H1)
4386 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2)
4387 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4)
4388 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8)
4389 
4390 /* Vector Register Gather Instruction */
4391 #define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H)                              \
4392 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4393                   CPURISCVState *env, uint32_t desc)                      \
4394 {                                                                         \
4395     uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
4396     uint32_t vm = vext_vm(desc);                                          \
4397     uint32_t vl = env->vl;                                                \
4398     uint64_t index;                                                       \
4399     uint32_t i;                                                           \
4400                                                                           \
4401     for (i = 0; i < vl; i++) {                                            \
4402         if (!vm && !vext_elem_mask(v0, i)) {                              \
4403             continue;                                                     \
4404         }                                                                 \
4405         index = *((ETYPE *)vs1 + H(i));                                   \
4406         if (index >= vlmax) {                                             \
4407             *((ETYPE *)vd + H(i)) = 0;                                    \
4408         } else {                                                          \
4409             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4410         }                                                                 \
4411     }                                                                     \
4412 }
4413 
4414 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4415 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  H1)
4416 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2)
4417 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4)
4418 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8)
4419 
4420 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4421 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4422                   CPURISCVState *env, uint32_t desc)                      \
4423 {                                                                         \
4424     uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
4425     uint32_t vm = vext_vm(desc);                                          \
4426     uint32_t vl = env->vl;                                                \
4427     uint64_t index = s1;                                                  \
4428     uint32_t i;                                                           \
4429                                                                           \
4430     for (i = 0; i < vl; i++) {                                            \
4431         if (!vm && !vext_elem_mask(v0, i)) {                              \
4432             continue;                                                     \
4433         }                                                                 \
4434         if (index >= vlmax) {                                             \
4435             *((ETYPE *)vd + H(i)) = 0;                                    \
4436         } else {                                                          \
4437             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4438         }                                                                 \
4439     }                                                                     \
4440 }
4441 
4442 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4443 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4444 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4445 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4446 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4447 
4448 /* Vector Compress Instruction */
4449 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4450 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4451                   CPURISCVState *env, uint32_t desc)                      \
4452 {                                                                         \
4453     uint32_t vl = env->vl;                                                \
4454     uint32_t num = 0, i;                                                  \
4455                                                                           \
4456     for (i = 0; i < vl; i++) {                                            \
4457         if (!vext_elem_mask(vs1, i)) {                                    \
4458             continue;                                                     \
4459         }                                                                 \
4460         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4461         num++;                                                            \
4462     }                                                                     \
4463 }
4464 
4465 /* Compress into vd elements of vs2 where vs1 is enabled */
4466 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4467 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4468 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4469 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4470