xref: /openbmc/qemu/target/riscv/vector_helper.c (revision 31961cfe)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30 
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32                             target_ulong s2)
33 {
34     int vlmax, vl;
35     RISCVCPU *cpu = env_archcpu(env);
36     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39     int xlen = riscv_cpu_xlen(env);
40     bool vill = (s2 >> (xlen - 1)) & 0x1;
41     target_ulong reserved = s2 &
42                             MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43                                             xlen - 1 - R_VTYPE_RESERVED_SHIFT);
44 
45     if (lmul & 4) {
46         /* Fractional LMUL. */
47         if (lmul == 4 ||
48             cpu->cfg.elen >> (8 - lmul) < sew) {
49             vill = true;
50         }
51     }
52 
53     if ((sew > cpu->cfg.elen)
54         || vill
55         || (ediv != 0)
56         || (reserved != 0)) {
57         /* only set vill bit. */
58         env->vill = 1;
59         env->vtype = 0;
60         env->vl = 0;
61         env->vstart = 0;
62         return 0;
63     }
64 
65     vlmax = vext_get_vlmax(cpu, s2);
66     if (s1 <= vlmax) {
67         vl = s1;
68     } else {
69         vl = vlmax;
70     }
71     env->vl = vl;
72     env->vtype = s2;
73     env->vstart = 0;
74     return vl;
75 }
76 
77 /*
78  * Note that vector data is stored in host-endian 64-bit chunks,
79  * so addressing units smaller than that needs a host-endian fixup.
80  */
81 #ifdef HOST_WORDS_BIGENDIAN
82 #define H1(x)   ((x) ^ 7)
83 #define H1_2(x) ((x) ^ 6)
84 #define H1_4(x) ((x) ^ 4)
85 #define H2(x)   ((x) ^ 3)
86 #define H4(x)   ((x) ^ 1)
87 #define H8(x)   ((x))
88 #else
89 #define H1(x)   (x)
90 #define H1_2(x) (x)
91 #define H1_4(x) (x)
92 #define H2(x)   (x)
93 #define H4(x)   (x)
94 #define H8(x)   (x)
95 #endif
96 
97 static inline uint32_t vext_nf(uint32_t desc)
98 {
99     return FIELD_EX32(simd_data(desc), VDATA, NF);
100 }
101 
102 static inline uint32_t vext_vm(uint32_t desc)
103 {
104     return FIELD_EX32(simd_data(desc), VDATA, VM);
105 }
106 
107 /*
108  * Encode LMUL to lmul as following:
109  *     LMUL    vlmul    lmul
110  *      1       000       0
111  *      2       001       1
112  *      4       010       2
113  *      8       011       3
114  *      -       100       -
115  *     1/8      101      -3
116  *     1/4      110      -2
117  *     1/2      111      -1
118  */
119 static inline int32_t vext_lmul(uint32_t desc)
120 {
121     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
122 }
123 
124 /*
125  * Get the maximum number of elements can be operated.
126  *
127  * esz: log2 of element size in bytes.
128  */
129 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
130 {
131     /*
132      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
133      * so vlen in bytes (vlenb) is encoded as maxsz.
134      */
135     uint32_t vlenb = simd_maxsz(desc);
136 
137     /* Return VLMAX */
138     int scale = vext_lmul(desc) - esz;
139     return scale < 0 ? vlenb >> -scale : vlenb << scale;
140 }
141 
142 /*
143  * This function checks watchpoint before real load operation.
144  *
145  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
146  * In user mode, there is no watchpoint support now.
147  *
148  * It will trigger an exception if there is no mapping in TLB
149  * and page table walk can't fill the TLB entry. Then the guest
150  * software can return here after process the exception or never return.
151  */
152 static void probe_pages(CPURISCVState *env, target_ulong addr,
153                         target_ulong len, uintptr_t ra,
154                         MMUAccessType access_type)
155 {
156     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
157     target_ulong curlen = MIN(pagelen, len);
158 
159     probe_access(env, addr, curlen, access_type,
160                  cpu_mmu_index(env, false), ra);
161     if (len > curlen) {
162         addr += curlen;
163         curlen = len - curlen;
164         probe_access(env, addr, curlen, access_type,
165                      cpu_mmu_index(env, false), ra);
166     }
167 }
168 
169 static inline void vext_set_elem_mask(void *v0, int index,
170                                       uint8_t value)
171 {
172     int idx = index / 64;
173     int pos = index % 64;
174     uint64_t old = ((uint64_t *)v0)[idx];
175     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
176 }
177 
178 /*
179  * Earlier designs (pre-0.9) had a varying number of bits
180  * per mask value (MLEN). In the 0.9 design, MLEN=1.
181  * (Section 4.5)
182  */
183 static inline int vext_elem_mask(void *v0, int index)
184 {
185     int idx = index / 64;
186     int pos = index  % 64;
187     return (((uint64_t *)v0)[idx] >> pos) & 1;
188 }
189 
190 /* elements operations for load and store */
191 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
192                                uint32_t idx, void *vd, uintptr_t retaddr);
193 
194 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
195 static void NAME(CPURISCVState *env, abi_ptr addr,         \
196                  uint32_t idx, void *vd, uintptr_t retaddr)\
197 {                                                          \
198     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
199     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
200 }                                                          \
201 
202 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
203 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
204 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
205 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
206 
207 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
208 static void NAME(CPURISCVState *env, abi_ptr addr,         \
209                  uint32_t idx, void *vd, uintptr_t retaddr)\
210 {                                                          \
211     ETYPE data = *((ETYPE *)vd + H(idx));                  \
212     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
213 }
214 
215 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
216 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
217 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
218 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
219 
220 /*
221  *** stride: access vector element from strided memory
222  */
223 static void
224 vext_ldst_stride(void *vd, void *v0, target_ulong base,
225                  target_ulong stride, CPURISCVState *env,
226                  uint32_t desc, uint32_t vm,
227                  vext_ldst_elem_fn *ldst_elem,
228                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
229 {
230     uint32_t i, k;
231     uint32_t nf = vext_nf(desc);
232     uint32_t max_elems = vext_max_elems(desc, esz);
233 
234     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
235         if (!vm && !vext_elem_mask(v0, i)) {
236             continue;
237         }
238 
239         k = 0;
240         while (k < nf) {
241             target_ulong addr = base + stride * i + (k << esz);
242             ldst_elem(env, addr, i + k * max_elems, vd, ra);
243             k++;
244         }
245     }
246     env->vstart = 0;
247 }
248 
249 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
250 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
251                   target_ulong stride, CPURISCVState *env,              \
252                   uint32_t desc)                                        \
253 {                                                                       \
254     uint32_t vm = vext_vm(desc);                                        \
255     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
256                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
257 }
258 
259 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
260 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
261 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
262 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
263 
264 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
265 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
266                   target_ulong stride, CPURISCVState *env,              \
267                   uint32_t desc)                                        \
268 {                                                                       \
269     uint32_t vm = vext_vm(desc);                                        \
270     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
271                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
272 }
273 
274 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
275 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
276 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
277 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
278 
279 /*
280  *** unit-stride: access elements stored contiguously in memory
281  */
282 
283 /* unmasked unit-stride load and store operation*/
284 static void
285 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
286              vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
287              uintptr_t ra, MMUAccessType access_type)
288 {
289     uint32_t i, k;
290     uint32_t nf = vext_nf(desc);
291     uint32_t max_elems = vext_max_elems(desc, esz);
292 
293     /* load bytes from guest memory */
294     for (i = env->vstart; i < evl; i++, env->vstart++) {
295         k = 0;
296         while (k < nf) {
297             target_ulong addr = base + ((i * nf + k) << esz);
298             ldst_elem(env, addr, i + k * max_elems, vd, ra);
299             k++;
300         }
301     }
302     env->vstart = 0;
303 }
304 
305 /*
306  * masked unit-stride load and store operation will be a special case of stride,
307  * stride = NF * sizeof (MTYPE)
308  */
309 
310 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
311 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
312                          CPURISCVState *env, uint32_t desc)             \
313 {                                                                       \
314     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
315     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
316                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
317 }                                                                       \
318                                                                         \
319 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
320                   CPURISCVState *env, uint32_t desc)                    \
321 {                                                                       \
322     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
323                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
324 }
325 
326 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
327 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
328 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
329 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
330 
331 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                            \
332 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,          \
333                          CPURISCVState *env, uint32_t desc)              \
334 {                                                                        \
335     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));              \
336     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,   \
337                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);      \
338 }                                                                        \
339                                                                          \
340 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                 \
341                   CPURISCVState *env, uint32_t desc)                     \
342 {                                                                        \
343     vext_ldst_us(vd, base, env, desc, STORE_FN,                          \
344                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
345 }
346 
347 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
348 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
349 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
350 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
351 
352 /*
353  *** unit stride mask load and store, EEW = 1
354  */
355 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
356                     CPURISCVState *env, uint32_t desc)
357 {
358     /* evl = ceil(vl/8) */
359     uint8_t evl = (env->vl + 7) >> 3;
360     vext_ldst_us(vd, base, env, desc, lde_b,
361                  0, evl, GETPC(), MMU_DATA_LOAD);
362 }
363 
364 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
365                     CPURISCVState *env, uint32_t desc)
366 {
367     /* evl = ceil(vl/8) */
368     uint8_t evl = (env->vl + 7) >> 3;
369     vext_ldst_us(vd, base, env, desc, ste_b,
370                  0, evl, GETPC(), MMU_DATA_STORE);
371 }
372 
373 /*
374  *** index: access vector element from indexed memory
375  */
376 typedef target_ulong vext_get_index_addr(target_ulong base,
377         uint32_t idx, void *vs2);
378 
379 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
380 static target_ulong NAME(target_ulong base,            \
381                          uint32_t idx, void *vs2)      \
382 {                                                      \
383     return (base + *((ETYPE *)vs2 + H(idx)));          \
384 }
385 
386 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
387 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
388 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
389 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
390 
391 static inline void
392 vext_ldst_index(void *vd, void *v0, target_ulong base,
393                 void *vs2, CPURISCVState *env, uint32_t desc,
394                 vext_get_index_addr get_index_addr,
395                 vext_ldst_elem_fn *ldst_elem,
396                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
397 {
398     uint32_t i, k;
399     uint32_t nf = vext_nf(desc);
400     uint32_t vm = vext_vm(desc);
401     uint32_t max_elems = vext_max_elems(desc, esz);
402 
403     /* load bytes from guest memory */
404     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
405         if (!vm && !vext_elem_mask(v0, i)) {
406             continue;
407         }
408 
409         k = 0;
410         while (k < nf) {
411             abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
412             ldst_elem(env, addr, i + k * max_elems, vd, ra);
413             k++;
414         }
415     }
416     env->vstart = 0;
417 }
418 
419 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
420 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
421                   void *vs2, CPURISCVState *env, uint32_t desc)            \
422 {                                                                          \
423     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
424                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
425 }
426 
427 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
428 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
429 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
430 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
431 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
432 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
433 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
434 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
435 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
436 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
437 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
438 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
439 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
440 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
441 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
442 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
443 
444 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
445 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
446                   void *vs2, CPURISCVState *env, uint32_t desc)  \
447 {                                                                \
448     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
449                     STORE_FN, ctzl(sizeof(ETYPE)),               \
450                     GETPC(), MMU_DATA_STORE);                    \
451 }
452 
453 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
454 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
455 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
456 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
457 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
458 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
459 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
460 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
461 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
462 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
463 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
464 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
465 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
466 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
467 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
468 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
469 
470 /*
471  *** unit-stride fault-only-fisrt load instructions
472  */
473 static inline void
474 vext_ldff(void *vd, void *v0, target_ulong base,
475           CPURISCVState *env, uint32_t desc,
476           vext_ldst_elem_fn *ldst_elem,
477           uint32_t esz, uintptr_t ra)
478 {
479     void *host;
480     uint32_t i, k, vl = 0;
481     uint32_t nf = vext_nf(desc);
482     uint32_t vm = vext_vm(desc);
483     uint32_t max_elems = vext_max_elems(desc, esz);
484     target_ulong addr, offset, remain;
485 
486     /* probe every access*/
487     for (i = env->vstart; i < env->vl; i++) {
488         if (!vm && !vext_elem_mask(v0, i)) {
489             continue;
490         }
491         addr = base + i * (nf << esz);
492         if (i == 0) {
493             probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
494         } else {
495             /* if it triggers an exception, no need to check watchpoint */
496             remain = nf << esz;
497             while (remain > 0) {
498                 offset = -(addr | TARGET_PAGE_MASK);
499                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
500                                          cpu_mmu_index(env, false));
501                 if (host) {
502 #ifdef CONFIG_USER_ONLY
503                     if (page_check_range(addr, nf << esz, PAGE_READ) < 0) {
504                         vl = i;
505                         goto ProbeSuccess;
506                     }
507 #else
508                     probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
509 #endif
510                 } else {
511                     vl = i;
512                     goto ProbeSuccess;
513                 }
514                 if (remain <=  offset) {
515                     break;
516                 }
517                 remain -= offset;
518                 addr += offset;
519             }
520         }
521     }
522 ProbeSuccess:
523     /* load bytes from guest memory */
524     if (vl != 0) {
525         env->vl = vl;
526     }
527     for (i = env->vstart; i < env->vl; i++) {
528         k = 0;
529         if (!vm && !vext_elem_mask(v0, i)) {
530             continue;
531         }
532         while (k < nf) {
533             target_ulong addr = base + ((i * nf + k) << esz);
534             ldst_elem(env, addr, i + k * max_elems, vd, ra);
535             k++;
536         }
537     }
538     env->vstart = 0;
539 }
540 
541 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
542 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
543                   CPURISCVState *env, uint32_t desc)      \
544 {                                                         \
545     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
546               ctzl(sizeof(ETYPE)), GETPC());              \
547 }
548 
549 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
550 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
551 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
552 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
553 
554 #define DO_SWAP(N, M) (M)
555 #define DO_AND(N, M)  (N & M)
556 #define DO_XOR(N, M)  (N ^ M)
557 #define DO_OR(N, M)   (N | M)
558 #define DO_ADD(N, M)  (N + M)
559 
560 /* Signed min/max */
561 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
562 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
563 
564 /* Unsigned min/max */
565 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
566 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
567 
568 /*
569  *** load and store whole register instructions
570  */
571 static void
572 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
573                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
574                 MMUAccessType access_type)
575 {
576     uint32_t i, k, off, pos;
577     uint32_t nf = vext_nf(desc);
578     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
579     uint32_t max_elems = vlenb >> esz;
580 
581     k = env->vstart / max_elems;
582     off = env->vstart % max_elems;
583 
584     if (off) {
585         /* load/store rest of elements of current segment pointed by vstart */
586         for (pos = off; pos < max_elems; pos++, env->vstart++) {
587             target_ulong addr = base + ((pos + k * max_elems) << esz);
588             ldst_elem(env, addr, pos + k * max_elems, vd, ra);
589         }
590         k++;
591     }
592 
593     /* load/store elements for rest of segments */
594     for (; k < nf; k++) {
595         for (i = 0; i < max_elems; i++, env->vstart++) {
596             target_ulong addr = base + ((i + k * max_elems) << esz);
597             ldst_elem(env, addr, i + k * max_elems, vd, ra);
598         }
599     }
600 
601     env->vstart = 0;
602 }
603 
604 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
605 void HELPER(NAME)(void *vd, target_ulong base,       \
606                   CPURISCVState *env, uint32_t desc) \
607 {                                                    \
608     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
609                     ctzl(sizeof(ETYPE)), GETPC(),    \
610                     MMU_DATA_LOAD);                  \
611 }
612 
613 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
614 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
615 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
616 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
617 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
618 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
619 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
620 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
621 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
622 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
623 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
624 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
625 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
626 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
627 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
628 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
629 
630 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
631 void HELPER(NAME)(void *vd, target_ulong base,       \
632                   CPURISCVState *env, uint32_t desc) \
633 {                                                    \
634     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
635                     ctzl(sizeof(ETYPE)), GETPC(),    \
636                     MMU_DATA_STORE);                 \
637 }
638 
639 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
640 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
641 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
642 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
643 
644 /*
645  *** Vector Integer Arithmetic Instructions
646  */
647 
648 /* expand macro args before macro */
649 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
650 
651 /* (TD, T1, T2, TX1, TX2) */
652 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
653 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
654 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
655 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
656 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
657 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
658 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
659 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
660 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
661 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
662 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
663 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
664 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
665 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
666 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
667 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
668 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
669 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
670 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
671 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
672 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
673 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
674 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
675 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
676 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
677 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
678 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
679 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
680 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
681 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
682 
683 /* operation of two vector elements */
684 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
685 
686 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
687 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
688 {                                                               \
689     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
690     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
691     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
692 }
693 #define DO_SUB(N, M) (N - M)
694 #define DO_RSUB(N, M) (M - N)
695 
696 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
697 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
698 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
699 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
700 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
701 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
702 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
703 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
704 
705 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
706                        CPURISCVState *env, uint32_t desc,
707                        uint32_t esz, uint32_t dsz,
708                        opivv2_fn *fn)
709 {
710     uint32_t vm = vext_vm(desc);
711     uint32_t vl = env->vl;
712     uint32_t i;
713 
714     for (i = env->vstart; i < vl; i++) {
715         if (!vm && !vext_elem_mask(v0, i)) {
716             continue;
717         }
718         fn(vd, vs1, vs2, i);
719     }
720     env->vstart = 0;
721 }
722 
723 /* generate the helpers for OPIVV */
724 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
725 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
726                   void *vs2, CPURISCVState *env,          \
727                   uint32_t desc)                          \
728 {                                                         \
729     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
730                do_##NAME);                                \
731 }
732 
733 GEN_VEXT_VV(vadd_vv_b, 1, 1)
734 GEN_VEXT_VV(vadd_vv_h, 2, 2)
735 GEN_VEXT_VV(vadd_vv_w, 4, 4)
736 GEN_VEXT_VV(vadd_vv_d, 8, 8)
737 GEN_VEXT_VV(vsub_vv_b, 1, 1)
738 GEN_VEXT_VV(vsub_vv_h, 2, 2)
739 GEN_VEXT_VV(vsub_vv_w, 4, 4)
740 GEN_VEXT_VV(vsub_vv_d, 8, 8)
741 
742 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
743 
744 /*
745  * (T1)s1 gives the real operator type.
746  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
747  */
748 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
749 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
750 {                                                                   \
751     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
752     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
753 }
754 
755 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
756 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
757 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
758 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
759 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
760 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
761 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
762 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
763 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
764 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
765 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
766 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
767 
768 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
769                        CPURISCVState *env, uint32_t desc,
770                        uint32_t esz, uint32_t dsz,
771                        opivx2_fn fn)
772 {
773     uint32_t vm = vext_vm(desc);
774     uint32_t vl = env->vl;
775     uint32_t i;
776 
777     for (i = env->vstart; i < vl; i++) {
778         if (!vm && !vext_elem_mask(v0, i)) {
779             continue;
780         }
781         fn(vd, s1, vs2, i);
782     }
783     env->vstart = 0;
784 }
785 
786 /* generate the helpers for OPIVX */
787 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
788 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
789                   void *vs2, CPURISCVState *env,          \
790                   uint32_t desc)                          \
791 {                                                         \
792     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
793                do_##NAME);                                \
794 }
795 
796 GEN_VEXT_VX(vadd_vx_b, 1, 1)
797 GEN_VEXT_VX(vadd_vx_h, 2, 2)
798 GEN_VEXT_VX(vadd_vx_w, 4, 4)
799 GEN_VEXT_VX(vadd_vx_d, 8, 8)
800 GEN_VEXT_VX(vsub_vx_b, 1, 1)
801 GEN_VEXT_VX(vsub_vx_h, 2, 2)
802 GEN_VEXT_VX(vsub_vx_w, 4, 4)
803 GEN_VEXT_VX(vsub_vx_d, 8, 8)
804 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
805 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
806 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
807 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
808 
809 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
810 {
811     intptr_t oprsz = simd_oprsz(desc);
812     intptr_t i;
813 
814     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
815         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
816     }
817 }
818 
819 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
820 {
821     intptr_t oprsz = simd_oprsz(desc);
822     intptr_t i;
823 
824     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
825         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
826     }
827 }
828 
829 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
830 {
831     intptr_t oprsz = simd_oprsz(desc);
832     intptr_t i;
833 
834     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
835         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
836     }
837 }
838 
839 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
840 {
841     intptr_t oprsz = simd_oprsz(desc);
842     intptr_t i;
843 
844     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
845         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
846     }
847 }
848 
849 /* Vector Widening Integer Add/Subtract */
850 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
851 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
852 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
853 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
854 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
855 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
856 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
857 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
858 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
859 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
860 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
861 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
862 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
863 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
864 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
865 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
866 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
867 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
868 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
869 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
870 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
871 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
872 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
873 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
874 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
875 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
876 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
877 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
878 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
879 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
880 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
881 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
882 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
883 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
884 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
885 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
886 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
887 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
888 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
889 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
890 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
891 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
892 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
893 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
894 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
895 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
896 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
897 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
898 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
899 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
900 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
901 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
902 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
903 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
904 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
905 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
906 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
907 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
908 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
909 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
910 
911 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
912 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
913 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
914 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
915 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
916 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
917 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
918 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
919 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
920 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
921 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
922 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
923 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
924 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
925 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
926 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
927 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
928 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
929 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
930 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
931 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
932 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
933 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
934 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
935 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
936 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
937 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
938 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
939 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
940 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
941 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
942 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
943 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
944 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
945 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
946 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
947 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
948 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
949 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
950 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
951 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
952 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
953 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
954 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
955 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
956 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
957 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
958 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
959 
960 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
961 #define DO_VADC(N, M, C) (N + M + C)
962 #define DO_VSBC(N, M, C) (N - M - C)
963 
964 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
965 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
966                   CPURISCVState *env, uint32_t desc)          \
967 {                                                             \
968     uint32_t vl = env->vl;                                    \
969     uint32_t i;                                               \
970                                                               \
971     for (i = env->vstart; i < vl; i++) {                      \
972         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
973         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
974         ETYPE carry = vext_elem_mask(v0, i);                  \
975                                                               \
976         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
977     }                                                         \
978     env->vstart = 0;                                          \
979 }
980 
981 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
982 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
983 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
984 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
985 
986 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
987 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
988 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
989 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
990 
991 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
992 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
993                   CPURISCVState *env, uint32_t desc)                     \
994 {                                                                        \
995     uint32_t vl = env->vl;                                               \
996     uint32_t i;                                                          \
997                                                                          \
998     for (i = env->vstart; i < vl; i++) {                                 \
999         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
1000         ETYPE carry = vext_elem_mask(v0, i);                             \
1001                                                                          \
1002         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1003     }                                                                    \
1004     env->vstart = 0;                                          \
1005 }
1006 
1007 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
1008 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1009 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1010 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1011 
1012 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
1013 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1014 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1015 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1016 
1017 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
1018                           (__typeof(N))(N + M) < N)
1019 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1020 
1021 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
1022 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1023                   CPURISCVState *env, uint32_t desc)          \
1024 {                                                             \
1025     uint32_t vl = env->vl;                                    \
1026     uint32_t vm = vext_vm(desc);                              \
1027     uint32_t i;                                               \
1028                                                               \
1029     for (i = env->vstart; i < vl; i++) {                      \
1030         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1031         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1032         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1033         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1034     }                                                         \
1035     env->vstart = 0;                                          \
1036 }
1037 
1038 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1039 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1040 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1041 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1042 
1043 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1044 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1045 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1046 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1047 
1048 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1049 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1050                   void *vs2, CPURISCVState *env, uint32_t desc) \
1051 {                                                               \
1052     uint32_t vl = env->vl;                                      \
1053     uint32_t vm = vext_vm(desc);                                \
1054     uint32_t i;                                                 \
1055                                                                 \
1056     for (i = env->vstart; i < vl; i++) {                        \
1057         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1058         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1059         vext_set_elem_mask(vd, i,                               \
1060                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1061     }                                                           \
1062     env->vstart = 0;                                            \
1063 }
1064 
1065 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1066 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1067 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1068 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1069 
1070 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1071 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1072 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1073 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1074 
1075 /* Vector Bitwise Logical Instructions */
1076 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1077 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1078 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1079 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1080 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1081 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1082 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1083 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1084 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1085 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1086 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1087 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1088 GEN_VEXT_VV(vand_vv_b, 1, 1)
1089 GEN_VEXT_VV(vand_vv_h, 2, 2)
1090 GEN_VEXT_VV(vand_vv_w, 4, 4)
1091 GEN_VEXT_VV(vand_vv_d, 8, 8)
1092 GEN_VEXT_VV(vor_vv_b, 1, 1)
1093 GEN_VEXT_VV(vor_vv_h, 2, 2)
1094 GEN_VEXT_VV(vor_vv_w, 4, 4)
1095 GEN_VEXT_VV(vor_vv_d, 8, 8)
1096 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1097 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1098 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1099 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1100 
1101 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1102 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1103 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1104 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1105 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1106 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1107 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1108 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1109 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1110 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1111 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1112 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1113 GEN_VEXT_VX(vand_vx_b, 1, 1)
1114 GEN_VEXT_VX(vand_vx_h, 2, 2)
1115 GEN_VEXT_VX(vand_vx_w, 4, 4)
1116 GEN_VEXT_VX(vand_vx_d, 8, 8)
1117 GEN_VEXT_VX(vor_vx_b, 1, 1)
1118 GEN_VEXT_VX(vor_vx_h, 2, 2)
1119 GEN_VEXT_VX(vor_vx_w, 4, 4)
1120 GEN_VEXT_VX(vor_vx_d, 8, 8)
1121 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1122 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1123 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1124 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1125 
1126 /* Vector Single-Width Bit Shift Instructions */
1127 #define DO_SLL(N, M)  (N << (M))
1128 #define DO_SRL(N, M)  (N >> (M))
1129 
1130 /* generate the helpers for shift instructions with two vector operators */
1131 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1132 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1133                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1134 {                                                                         \
1135     uint32_t vm = vext_vm(desc);                                          \
1136     uint32_t vl = env->vl;                                                \
1137     uint32_t i;                                                           \
1138                                                                           \
1139     for (i = env->vstart; i < vl; i++) {                                  \
1140         if (!vm && !vext_elem_mask(v0, i)) {                              \
1141             continue;                                                     \
1142         }                                                                 \
1143         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1144         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1145         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1146     }                                                                     \
1147     env->vstart = 0;                                                      \
1148 }
1149 
1150 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1151 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1152 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1153 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1154 
1155 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1156 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1157 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1158 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1159 
1160 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1161 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1162 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1163 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1164 
1165 /* generate the helpers for shift instructions with one vector and one scalar */
1166 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1167 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1168         void *vs2, CPURISCVState *env, uint32_t desc)       \
1169 {                                                           \
1170     uint32_t vm = vext_vm(desc);                            \
1171     uint32_t vl = env->vl;                                  \
1172     uint32_t i;                                             \
1173                                                             \
1174     for (i = env->vstart; i < vl; i++) {                    \
1175         if (!vm && !vext_elem_mask(v0, i)) {                \
1176             continue;                                       \
1177         }                                                   \
1178         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1179         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1180     }                                                       \
1181     env->vstart = 0;                                        \
1182 }
1183 
1184 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1185 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1186 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1187 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1188 
1189 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1190 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1191 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1192 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1193 
1194 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1195 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1196 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1197 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1198 
1199 /* Vector Narrowing Integer Right Shift Instructions */
1200 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1201 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1202 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1203 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1204 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1205 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1206 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1207 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1208 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1209 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1210 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1211 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1212 
1213 /* Vector Integer Comparison Instructions */
1214 #define DO_MSEQ(N, M) (N == M)
1215 #define DO_MSNE(N, M) (N != M)
1216 #define DO_MSLT(N, M) (N < M)
1217 #define DO_MSLE(N, M) (N <= M)
1218 #define DO_MSGT(N, M) (N > M)
1219 
1220 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1221 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1222                   CPURISCVState *env, uint32_t desc)          \
1223 {                                                             \
1224     uint32_t vm = vext_vm(desc);                              \
1225     uint32_t vl = env->vl;                                    \
1226     uint32_t i;                                               \
1227                                                               \
1228     for (i = env->vstart; i < vl; i++) {                      \
1229         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1230         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1231         if (!vm && !vext_elem_mask(v0, i)) {                  \
1232             continue;                                         \
1233         }                                                     \
1234         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1235     }                                                         \
1236     env->vstart = 0;                                          \
1237 }
1238 
1239 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1240 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1241 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1242 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1243 
1244 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1245 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1246 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1247 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1248 
1249 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1250 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1251 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1252 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1253 
1254 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1255 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1256 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1257 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1258 
1259 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1260 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1261 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1262 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1263 
1264 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1265 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1266 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1267 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1268 
1269 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1270 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1271                   CPURISCVState *env, uint32_t desc)                \
1272 {                                                                   \
1273     uint32_t vm = vext_vm(desc);                                    \
1274     uint32_t vl = env->vl;                                          \
1275     uint32_t i;                                                     \
1276                                                                     \
1277     for (i = env->vstart; i < vl; i++) {                            \
1278         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1279         if (!vm && !vext_elem_mask(v0, i)) {                        \
1280             continue;                                               \
1281         }                                                           \
1282         vext_set_elem_mask(vd, i,                                   \
1283                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1284     }                                                               \
1285     env->vstart = 0;                                                \
1286 }
1287 
1288 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1289 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1290 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1291 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1292 
1293 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1294 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1295 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1296 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1297 
1298 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1299 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1300 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1301 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1302 
1303 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1304 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1305 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1306 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1307 
1308 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1309 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1310 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1311 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1312 
1313 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1314 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1315 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1316 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1317 
1318 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1319 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1320 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1321 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1322 
1323 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1324 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1325 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1326 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1327 
1328 /* Vector Integer Min/Max Instructions */
1329 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1330 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1331 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1332 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1333 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1334 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1335 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1336 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1337 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1338 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1339 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1340 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1341 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1342 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1343 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1344 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1345 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1346 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1347 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1348 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1349 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1350 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1351 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1352 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1353 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1354 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1355 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1356 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1357 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1358 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1359 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1360 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1361 
1362 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1363 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1364 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1365 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1366 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1367 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1368 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1369 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1370 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1371 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1372 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1373 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1374 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1375 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1376 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1377 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1378 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1379 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1380 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1381 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1382 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1383 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1384 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1385 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1386 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1387 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1388 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1389 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1390 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1391 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1392 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1393 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1394 
1395 /* Vector Single-Width Integer Multiply Instructions */
1396 #define DO_MUL(N, M) (N * M)
1397 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1398 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1399 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1400 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1401 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1402 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1403 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1404 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1405 
1406 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1407 {
1408     return (int16_t)s2 * (int16_t)s1 >> 8;
1409 }
1410 
1411 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1412 {
1413     return (int32_t)s2 * (int32_t)s1 >> 16;
1414 }
1415 
1416 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1417 {
1418     return (int64_t)s2 * (int64_t)s1 >> 32;
1419 }
1420 
1421 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1422 {
1423     uint64_t hi_64, lo_64;
1424 
1425     muls64(&lo_64, &hi_64, s1, s2);
1426     return hi_64;
1427 }
1428 
1429 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1430 {
1431     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1432 }
1433 
1434 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1435 {
1436     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1437 }
1438 
1439 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1440 {
1441     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1442 }
1443 
1444 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1445 {
1446     uint64_t hi_64, lo_64;
1447 
1448     mulu64(&lo_64, &hi_64, s2, s1);
1449     return hi_64;
1450 }
1451 
1452 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1453 {
1454     return (int16_t)s2 * (uint16_t)s1 >> 8;
1455 }
1456 
1457 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1458 {
1459     return (int32_t)s2 * (uint32_t)s1 >> 16;
1460 }
1461 
1462 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1463 {
1464     return (int64_t)s2 * (uint64_t)s1 >> 32;
1465 }
1466 
1467 /*
1468  * Let  A = signed operand,
1469  *      B = unsigned operand
1470  *      P = mulu64(A, B), unsigned product
1471  *
1472  * LET  X = 2 ** 64  - A, 2's complement of A
1473  *      SP = signed product
1474  * THEN
1475  *      IF A < 0
1476  *          SP = -X * B
1477  *             = -(2 ** 64 - A) * B
1478  *             = A * B - 2 ** 64 * B
1479  *             = P - 2 ** 64 * B
1480  *      ELSE
1481  *          SP = P
1482  * THEN
1483  *      HI_P -= (A < 0 ? B : 0)
1484  */
1485 
1486 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1487 {
1488     uint64_t hi_64, lo_64;
1489 
1490     mulu64(&lo_64, &hi_64, s2, s1);
1491 
1492     hi_64 -= s2 < 0 ? s1 : 0;
1493     return hi_64;
1494 }
1495 
1496 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1497 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1498 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1499 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1500 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1501 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1502 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1503 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1504 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1505 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1506 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1507 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1508 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1509 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1510 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1511 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1512 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1513 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1514 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1515 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1516 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1517 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1518 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1519 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1520 
1521 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1522 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1523 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1524 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1525 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1526 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1527 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1528 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1529 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1530 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1531 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1532 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1533 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1534 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1535 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1536 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1537 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1538 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1539 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1540 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1541 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1542 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1543 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1544 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1545 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1546 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1547 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1548 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1549 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1550 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1551 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1552 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1553 
1554 /* Vector Integer Divide Instructions */
1555 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1556 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1557 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1558         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1559 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1560         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1561 
1562 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1563 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1564 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1565 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1566 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1567 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1568 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1569 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1570 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1571 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1572 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1573 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1574 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1575 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1576 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1577 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1578 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1579 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1580 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1581 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1582 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1583 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1584 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1585 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1586 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1587 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1588 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1589 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1590 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1591 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1592 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1593 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1594 
1595 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1596 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1597 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1598 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1599 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1600 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1601 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1602 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1603 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1604 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1605 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1606 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1607 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1608 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1609 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1610 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1611 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1612 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1613 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1614 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1615 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1616 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1617 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1618 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1619 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1620 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1621 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1622 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1623 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1624 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1625 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1626 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1627 
1628 /* Vector Widening Integer Multiply Instructions */
1629 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1630 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1631 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1632 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1633 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1634 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1635 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1636 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1637 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1638 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1639 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1640 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1641 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1642 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1643 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1644 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1645 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1646 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1647 
1648 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1649 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1650 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1651 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1652 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1653 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1654 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1655 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1656 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1657 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1658 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1659 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1660 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1661 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1662 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1663 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1664 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1665 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1666 
1667 /* Vector Single-Width Integer Multiply-Add Instructions */
1668 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1669 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1670 {                                                                  \
1671     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1672     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1673     TD d = *((TD *)vd + HD(i));                                    \
1674     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1675 }
1676 
1677 #define DO_MACC(N, M, D) (M * N + D)
1678 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1679 #define DO_MADD(N, M, D) (M * D + N)
1680 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1681 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1682 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1683 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1684 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1685 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1686 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1687 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1688 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1689 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1690 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1691 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1692 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1693 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1694 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1695 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1696 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1697 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1698 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1699 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1700 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1701 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1702 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1703 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1704 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1705 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1706 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1707 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1708 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1709 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1710 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1711 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1712 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1713 
1714 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1715 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1716 {                                                                   \
1717     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1718     TD d = *((TD *)vd + HD(i));                                     \
1719     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1720 }
1721 
1722 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1723 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1724 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1725 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1726 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1727 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1728 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1729 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1730 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1731 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1732 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1733 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1734 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1735 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1736 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1737 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1738 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1739 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1740 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1741 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1742 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1743 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1744 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1745 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1746 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1747 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1748 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1749 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1750 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1751 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1752 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1753 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1754 
1755 /* Vector Widening Integer Multiply-Add Instructions */
1756 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1757 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1758 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1759 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1760 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1761 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1762 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1763 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1764 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1765 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1766 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1767 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1768 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1769 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1770 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1771 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1772 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1773 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1774 
1775 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1776 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1777 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1778 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1779 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1780 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1781 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1782 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1783 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1784 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1785 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1786 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1787 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1788 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1789 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1790 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1791 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1792 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1793 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1794 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1795 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1796 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1797 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1798 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1799 
1800 /* Vector Integer Merge and Move Instructions */
1801 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1802 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1803                   uint32_t desc)                                     \
1804 {                                                                    \
1805     uint32_t vl = env->vl;                                           \
1806     uint32_t i;                                                      \
1807                                                                      \
1808     for (i = env->vstart; i < vl; i++) {                             \
1809         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1810         *((ETYPE *)vd + H(i)) = s1;                                  \
1811     }                                                                \
1812     env->vstart = 0;                                                 \
1813 }
1814 
1815 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1816 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1817 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1818 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1819 
1820 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1821 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1822                   uint32_t desc)                                     \
1823 {                                                                    \
1824     uint32_t vl = env->vl;                                           \
1825     uint32_t i;                                                      \
1826                                                                      \
1827     for (i = env->vstart; i < vl; i++) {                             \
1828         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1829     }                                                                \
1830     env->vstart = 0;                                                 \
1831 }
1832 
1833 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1834 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1835 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1836 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1837 
1838 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1839 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1840                   CPURISCVState *env, uint32_t desc)                 \
1841 {                                                                    \
1842     uint32_t vl = env->vl;                                           \
1843     uint32_t i;                                                      \
1844                                                                      \
1845     for (i = env->vstart; i < vl; i++) {                             \
1846         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1847         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1848     }                                                                \
1849     env->vstart = 0;                                                 \
1850 }
1851 
1852 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1853 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1854 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1855 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1856 
1857 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1858 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1859                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1860 {                                                                    \
1861     uint32_t vl = env->vl;                                           \
1862     uint32_t i;                                                      \
1863                                                                      \
1864     for (i = env->vstart; i < vl; i++) {                             \
1865         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1866         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1867                    (ETYPE)(target_long)s1);                          \
1868         *((ETYPE *)vd + H(i)) = d;                                   \
1869     }                                                                \
1870     env->vstart = 0;                                                 \
1871 }
1872 
1873 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1874 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1875 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1876 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1877 
1878 /*
1879  *** Vector Fixed-Point Arithmetic Instructions
1880  */
1881 
1882 /* Vector Single-Width Saturating Add and Subtract */
1883 
1884 /*
1885  * As fixed point instructions probably have round mode and saturation,
1886  * define common macros for fixed point here.
1887  */
1888 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1889                           CPURISCVState *env, int vxrm);
1890 
1891 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1892 static inline void                                                  \
1893 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1894           CPURISCVState *env, int vxrm)                             \
1895 {                                                                   \
1896     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1897     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1898     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1899 }
1900 
1901 static inline void
1902 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1903              CPURISCVState *env,
1904              uint32_t vl, uint32_t vm, int vxrm,
1905              opivv2_rm_fn *fn)
1906 {
1907     for (uint32_t i = env->vstart; i < vl; i++) {
1908         if (!vm && !vext_elem_mask(v0, i)) {
1909             continue;
1910         }
1911         fn(vd, vs1, vs2, i, env, vxrm);
1912     }
1913     env->vstart = 0;
1914 }
1915 
1916 static inline void
1917 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1918              CPURISCVState *env,
1919              uint32_t desc, uint32_t esz, uint32_t dsz,
1920              opivv2_rm_fn *fn)
1921 {
1922     uint32_t vm = vext_vm(desc);
1923     uint32_t vl = env->vl;
1924 
1925     switch (env->vxrm) {
1926     case 0: /* rnu */
1927         vext_vv_rm_1(vd, v0, vs1, vs2,
1928                      env, vl, vm, 0, fn);
1929         break;
1930     case 1: /* rne */
1931         vext_vv_rm_1(vd, v0, vs1, vs2,
1932                      env, vl, vm, 1, fn);
1933         break;
1934     case 2: /* rdn */
1935         vext_vv_rm_1(vd, v0, vs1, vs2,
1936                      env, vl, vm, 2, fn);
1937         break;
1938     default: /* rod */
1939         vext_vv_rm_1(vd, v0, vs1, vs2,
1940                      env, vl, vm, 3, fn);
1941         break;
1942     }
1943 }
1944 
1945 /* generate helpers for fixed point instructions with OPIVV format */
1946 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1947 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1948                   CPURISCVState *env, uint32_t desc)            \
1949 {                                                               \
1950     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1951                  do_##NAME);                                    \
1952 }
1953 
1954 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1955 {
1956     uint8_t res = a + b;
1957     if (res < a) {
1958         res = UINT8_MAX;
1959         env->vxsat = 0x1;
1960     }
1961     return res;
1962 }
1963 
1964 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1965                                uint16_t b)
1966 {
1967     uint16_t res = a + b;
1968     if (res < a) {
1969         res = UINT16_MAX;
1970         env->vxsat = 0x1;
1971     }
1972     return res;
1973 }
1974 
1975 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1976                                uint32_t b)
1977 {
1978     uint32_t res = a + b;
1979     if (res < a) {
1980         res = UINT32_MAX;
1981         env->vxsat = 0x1;
1982     }
1983     return res;
1984 }
1985 
1986 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1987                                uint64_t b)
1988 {
1989     uint64_t res = a + b;
1990     if (res < a) {
1991         res = UINT64_MAX;
1992         env->vxsat = 0x1;
1993     }
1994     return res;
1995 }
1996 
1997 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1998 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1999 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2000 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2001 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
2002 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
2003 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
2004 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
2005 
2006 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2007                           CPURISCVState *env, int vxrm);
2008 
2009 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2010 static inline void                                                  \
2011 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2012           CPURISCVState *env, int vxrm)                             \
2013 {                                                                   \
2014     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2015     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2016 }
2017 
2018 static inline void
2019 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2020              CPURISCVState *env,
2021              uint32_t vl, uint32_t vm, int vxrm,
2022              opivx2_rm_fn *fn)
2023 {
2024     for (uint32_t i = env->vstart; i < vl; i++) {
2025         if (!vm && !vext_elem_mask(v0, i)) {
2026             continue;
2027         }
2028         fn(vd, s1, vs2, i, env, vxrm);
2029     }
2030     env->vstart = 0;
2031 }
2032 
2033 static inline void
2034 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2035              CPURISCVState *env,
2036              uint32_t desc, uint32_t esz, uint32_t dsz,
2037              opivx2_rm_fn *fn)
2038 {
2039     uint32_t vm = vext_vm(desc);
2040     uint32_t vl = env->vl;
2041 
2042     switch (env->vxrm) {
2043     case 0: /* rnu */
2044         vext_vx_rm_1(vd, v0, s1, vs2,
2045                      env, vl, vm, 0, fn);
2046         break;
2047     case 1: /* rne */
2048         vext_vx_rm_1(vd, v0, s1, vs2,
2049                      env, vl, vm, 1, fn);
2050         break;
2051     case 2: /* rdn */
2052         vext_vx_rm_1(vd, v0, s1, vs2,
2053                      env, vl, vm, 2, fn);
2054         break;
2055     default: /* rod */
2056         vext_vx_rm_1(vd, v0, s1, vs2,
2057                      env, vl, vm, 3, fn);
2058         break;
2059     }
2060 }
2061 
2062 /* generate helpers for fixed point instructions with OPIVX format */
2063 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2064 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2065         void *vs2, CPURISCVState *env, uint32_t desc)     \
2066 {                                                         \
2067     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2068                  do_##NAME);                              \
2069 }
2070 
2071 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2072 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2073 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2074 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2075 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2076 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2077 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2078 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2079 
2080 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2081 {
2082     int8_t res = a + b;
2083     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2084         res = a > 0 ? INT8_MAX : INT8_MIN;
2085         env->vxsat = 0x1;
2086     }
2087     return res;
2088 }
2089 
2090 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2091 {
2092     int16_t res = a + b;
2093     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2094         res = a > 0 ? INT16_MAX : INT16_MIN;
2095         env->vxsat = 0x1;
2096     }
2097     return res;
2098 }
2099 
2100 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2101 {
2102     int32_t res = a + b;
2103     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2104         res = a > 0 ? INT32_MAX : INT32_MIN;
2105         env->vxsat = 0x1;
2106     }
2107     return res;
2108 }
2109 
2110 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2111 {
2112     int64_t res = a + b;
2113     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2114         res = a > 0 ? INT64_MAX : INT64_MIN;
2115         env->vxsat = 0x1;
2116     }
2117     return res;
2118 }
2119 
2120 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2121 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2122 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2123 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2124 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2125 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2126 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2127 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2128 
2129 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2130 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2131 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2132 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2133 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2134 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2135 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2136 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2137 
2138 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2139 {
2140     uint8_t res = a - b;
2141     if (res > a) {
2142         res = 0;
2143         env->vxsat = 0x1;
2144     }
2145     return res;
2146 }
2147 
2148 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2149                                uint16_t b)
2150 {
2151     uint16_t res = a - b;
2152     if (res > a) {
2153         res = 0;
2154         env->vxsat = 0x1;
2155     }
2156     return res;
2157 }
2158 
2159 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2160                                uint32_t b)
2161 {
2162     uint32_t res = a - b;
2163     if (res > a) {
2164         res = 0;
2165         env->vxsat = 0x1;
2166     }
2167     return res;
2168 }
2169 
2170 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2171                                uint64_t b)
2172 {
2173     uint64_t res = a - b;
2174     if (res > a) {
2175         res = 0;
2176         env->vxsat = 0x1;
2177     }
2178     return res;
2179 }
2180 
2181 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2182 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2183 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2184 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2185 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2186 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2187 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2188 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2189 
2190 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2191 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2192 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2193 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2194 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2195 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2196 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2197 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2198 
2199 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2200 {
2201     int8_t res = a - b;
2202     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2203         res = a >= 0 ? INT8_MAX : INT8_MIN;
2204         env->vxsat = 0x1;
2205     }
2206     return res;
2207 }
2208 
2209 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2210 {
2211     int16_t res = a - b;
2212     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2213         res = a >= 0 ? INT16_MAX : INT16_MIN;
2214         env->vxsat = 0x1;
2215     }
2216     return res;
2217 }
2218 
2219 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2220 {
2221     int32_t res = a - b;
2222     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2223         res = a >= 0 ? INT32_MAX : INT32_MIN;
2224         env->vxsat = 0x1;
2225     }
2226     return res;
2227 }
2228 
2229 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2230 {
2231     int64_t res = a - b;
2232     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2233         res = a >= 0 ? INT64_MAX : INT64_MIN;
2234         env->vxsat = 0x1;
2235     }
2236     return res;
2237 }
2238 
2239 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2240 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2241 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2242 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2243 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2244 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2245 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2246 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2247 
2248 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2249 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2250 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2251 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2252 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2253 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2254 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2255 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2256 
2257 /* Vector Single-Width Averaging Add and Subtract */
2258 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2259 {
2260     uint8_t d = extract64(v, shift, 1);
2261     uint8_t d1;
2262     uint64_t D1, D2;
2263 
2264     if (shift == 0 || shift > 64) {
2265         return 0;
2266     }
2267 
2268     d1 = extract64(v, shift - 1, 1);
2269     D1 = extract64(v, 0, shift);
2270     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2271         return d1;
2272     } else if (vxrm == 1) { /* round-to-nearest-even */
2273         if (shift > 1) {
2274             D2 = extract64(v, 0, shift - 1);
2275             return d1 & ((D2 != 0) | d);
2276         } else {
2277             return d1 & d;
2278         }
2279     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2280         return !d & (D1 != 0);
2281     }
2282     return 0; /* round-down (truncate) */
2283 }
2284 
2285 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2286 {
2287     int64_t res = (int64_t)a + b;
2288     uint8_t round = get_round(vxrm, res, 1);
2289 
2290     return (res >> 1) + round;
2291 }
2292 
2293 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2294 {
2295     int64_t res = a + b;
2296     uint8_t round = get_round(vxrm, res, 1);
2297     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2298 
2299     /* With signed overflow, bit 64 is inverse of bit 63. */
2300     return ((res >> 1) ^ over) + round;
2301 }
2302 
2303 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2304 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2305 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2306 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2307 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2308 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2309 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2310 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2311 
2312 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2313 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2314 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2315 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2316 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2317 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2318 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2319 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2320 
2321 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2322                                uint32_t a, uint32_t b)
2323 {
2324     uint64_t res = (uint64_t)a + b;
2325     uint8_t round = get_round(vxrm, res, 1);
2326 
2327     return (res >> 1) + round;
2328 }
2329 
2330 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2331                                uint64_t a, uint64_t b)
2332 {
2333     uint64_t res = a + b;
2334     uint8_t round = get_round(vxrm, res, 1);
2335     uint64_t over = (uint64_t)(res < a) << 63;
2336 
2337     return ((res >> 1) | over) + round;
2338 }
2339 
2340 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2341 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2342 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2343 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2344 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2345 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2346 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2347 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2348 
2349 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2350 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2351 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2352 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2353 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2354 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2355 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2356 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2357 
2358 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2359 {
2360     int64_t res = (int64_t)a - b;
2361     uint8_t round = get_round(vxrm, res, 1);
2362 
2363     return (res >> 1) + round;
2364 }
2365 
2366 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2367 {
2368     int64_t res = (int64_t)a - b;
2369     uint8_t round = get_round(vxrm, res, 1);
2370     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2371 
2372     /* With signed overflow, bit 64 is inverse of bit 63. */
2373     return ((res >> 1) ^ over) + round;
2374 }
2375 
2376 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2377 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2378 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2379 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2380 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2381 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2382 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2383 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2384 
2385 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2386 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2387 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2388 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2389 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2390 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2391 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2392 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2393 
2394 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2395                                uint32_t a, uint32_t b)
2396 {
2397     int64_t res = (int64_t)a - b;
2398     uint8_t round = get_round(vxrm, res, 1);
2399 
2400     return (res >> 1) + round;
2401 }
2402 
2403 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2404                                uint64_t a, uint64_t b)
2405 {
2406     uint64_t res = (uint64_t)a - b;
2407     uint8_t round = get_round(vxrm, res, 1);
2408     uint64_t over = (uint64_t)(res > a) << 63;
2409 
2410     return ((res >> 1) | over) + round;
2411 }
2412 
2413 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2414 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2415 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2416 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2417 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2418 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2419 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2420 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2421 
2422 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2423 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2424 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2425 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2426 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2427 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2428 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2429 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2430 
2431 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2432 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2433 {
2434     uint8_t round;
2435     int16_t res;
2436 
2437     res = (int16_t)a * (int16_t)b;
2438     round = get_round(vxrm, res, 7);
2439     res   = (res >> 7) + round;
2440 
2441     if (res > INT8_MAX) {
2442         env->vxsat = 0x1;
2443         return INT8_MAX;
2444     } else if (res < INT8_MIN) {
2445         env->vxsat = 0x1;
2446         return INT8_MIN;
2447     } else {
2448         return res;
2449     }
2450 }
2451 
2452 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2453 {
2454     uint8_t round;
2455     int32_t res;
2456 
2457     res = (int32_t)a * (int32_t)b;
2458     round = get_round(vxrm, res, 15);
2459     res   = (res >> 15) + round;
2460 
2461     if (res > INT16_MAX) {
2462         env->vxsat = 0x1;
2463         return INT16_MAX;
2464     } else if (res < INT16_MIN) {
2465         env->vxsat = 0x1;
2466         return INT16_MIN;
2467     } else {
2468         return res;
2469     }
2470 }
2471 
2472 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2473 {
2474     uint8_t round;
2475     int64_t res;
2476 
2477     res = (int64_t)a * (int64_t)b;
2478     round = get_round(vxrm, res, 31);
2479     res   = (res >> 31) + round;
2480 
2481     if (res > INT32_MAX) {
2482         env->vxsat = 0x1;
2483         return INT32_MAX;
2484     } else if (res < INT32_MIN) {
2485         env->vxsat = 0x1;
2486         return INT32_MIN;
2487     } else {
2488         return res;
2489     }
2490 }
2491 
2492 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2493 {
2494     uint8_t round;
2495     uint64_t hi_64, lo_64;
2496     int64_t res;
2497 
2498     if (a == INT64_MIN && b == INT64_MIN) {
2499         env->vxsat = 1;
2500         return INT64_MAX;
2501     }
2502 
2503     muls64(&lo_64, &hi_64, a, b);
2504     round = get_round(vxrm, lo_64, 63);
2505     /*
2506      * Cannot overflow, as there are always
2507      * 2 sign bits after multiply.
2508      */
2509     res = (hi_64 << 1) | (lo_64 >> 63);
2510     if (round) {
2511         if (res == INT64_MAX) {
2512             env->vxsat = 1;
2513         } else {
2514             res += 1;
2515         }
2516     }
2517     return res;
2518 }
2519 
2520 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2521 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2522 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2523 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2524 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2525 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2526 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2527 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2528 
2529 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2530 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2531 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2532 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2533 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2534 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2535 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2536 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2537 
2538 /* Vector Single-Width Scaling Shift Instructions */
2539 static inline uint8_t
2540 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2541 {
2542     uint8_t round, shift = b & 0x7;
2543     uint8_t res;
2544 
2545     round = get_round(vxrm, a, shift);
2546     res   = (a >> shift)  + round;
2547     return res;
2548 }
2549 static inline uint16_t
2550 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2551 {
2552     uint8_t round, shift = b & 0xf;
2553     uint16_t res;
2554 
2555     round = get_round(vxrm, a, shift);
2556     res   = (a >> shift)  + round;
2557     return res;
2558 }
2559 static inline uint32_t
2560 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2561 {
2562     uint8_t round, shift = b & 0x1f;
2563     uint32_t res;
2564 
2565     round = get_round(vxrm, a, shift);
2566     res   = (a >> shift)  + round;
2567     return res;
2568 }
2569 static inline uint64_t
2570 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2571 {
2572     uint8_t round, shift = b & 0x3f;
2573     uint64_t res;
2574 
2575     round = get_round(vxrm, a, shift);
2576     res   = (a >> shift)  + round;
2577     return res;
2578 }
2579 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2580 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2581 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2582 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2583 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2584 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2585 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2586 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2587 
2588 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2589 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2590 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2591 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2592 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2593 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2594 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2595 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2596 
2597 static inline int8_t
2598 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2599 {
2600     uint8_t round, shift = b & 0x7;
2601     int8_t res;
2602 
2603     round = get_round(vxrm, a, shift);
2604     res   = (a >> shift)  + round;
2605     return res;
2606 }
2607 static inline int16_t
2608 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2609 {
2610     uint8_t round, shift = b & 0xf;
2611     int16_t res;
2612 
2613     round = get_round(vxrm, a, shift);
2614     res   = (a >> shift)  + round;
2615     return res;
2616 }
2617 static inline int32_t
2618 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2619 {
2620     uint8_t round, shift = b & 0x1f;
2621     int32_t res;
2622 
2623     round = get_round(vxrm, a, shift);
2624     res   = (a >> shift)  + round;
2625     return res;
2626 }
2627 static inline int64_t
2628 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2629 {
2630     uint8_t round, shift = b & 0x3f;
2631     int64_t res;
2632 
2633     round = get_round(vxrm, a, shift);
2634     res   = (a >> shift)  + round;
2635     return res;
2636 }
2637 
2638 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2639 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2640 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2641 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2642 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2643 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2644 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2645 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2646 
2647 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2648 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2649 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2650 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2651 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2652 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2653 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2654 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2655 
2656 /* Vector Narrowing Fixed-Point Clip Instructions */
2657 static inline int8_t
2658 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2659 {
2660     uint8_t round, shift = b & 0xf;
2661     int16_t res;
2662 
2663     round = get_round(vxrm, a, shift);
2664     res   = (a >> shift)  + round;
2665     if (res > INT8_MAX) {
2666         env->vxsat = 0x1;
2667         return INT8_MAX;
2668     } else if (res < INT8_MIN) {
2669         env->vxsat = 0x1;
2670         return INT8_MIN;
2671     } else {
2672         return res;
2673     }
2674 }
2675 
2676 static inline int16_t
2677 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2678 {
2679     uint8_t round, shift = b & 0x1f;
2680     int32_t res;
2681 
2682     round = get_round(vxrm, a, shift);
2683     res   = (a >> shift)  + round;
2684     if (res > INT16_MAX) {
2685         env->vxsat = 0x1;
2686         return INT16_MAX;
2687     } else if (res < INT16_MIN) {
2688         env->vxsat = 0x1;
2689         return INT16_MIN;
2690     } else {
2691         return res;
2692     }
2693 }
2694 
2695 static inline int32_t
2696 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2697 {
2698     uint8_t round, shift = b & 0x3f;
2699     int64_t res;
2700 
2701     round = get_round(vxrm, a, shift);
2702     res   = (a >> shift)  + round;
2703     if (res > INT32_MAX) {
2704         env->vxsat = 0x1;
2705         return INT32_MAX;
2706     } else if (res < INT32_MIN) {
2707         env->vxsat = 0x1;
2708         return INT32_MIN;
2709     } else {
2710         return res;
2711     }
2712 }
2713 
2714 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2715 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2716 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2717 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2718 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2719 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2720 
2721 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2722 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2723 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2724 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2725 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2726 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2727 
2728 static inline uint8_t
2729 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2730 {
2731     uint8_t round, shift = b & 0xf;
2732     uint16_t res;
2733 
2734     round = get_round(vxrm, a, shift);
2735     res   = (a >> shift)  + round;
2736     if (res > UINT8_MAX) {
2737         env->vxsat = 0x1;
2738         return UINT8_MAX;
2739     } else {
2740         return res;
2741     }
2742 }
2743 
2744 static inline uint16_t
2745 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2746 {
2747     uint8_t round, shift = b & 0x1f;
2748     uint32_t res;
2749 
2750     round = get_round(vxrm, a, shift);
2751     res   = (a >> shift)  + round;
2752     if (res > UINT16_MAX) {
2753         env->vxsat = 0x1;
2754         return UINT16_MAX;
2755     } else {
2756         return res;
2757     }
2758 }
2759 
2760 static inline uint32_t
2761 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2762 {
2763     uint8_t round, shift = b & 0x3f;
2764     uint64_t res;
2765 
2766     round = get_round(vxrm, a, shift);
2767     res   = (a >> shift)  + round;
2768     if (res > UINT32_MAX) {
2769         env->vxsat = 0x1;
2770         return UINT32_MAX;
2771     } else {
2772         return res;
2773     }
2774 }
2775 
2776 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2777 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2778 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2779 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2780 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2781 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2782 
2783 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2784 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2785 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2786 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2787 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2788 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2789 
2790 /*
2791  *** Vector Float Point Arithmetic Instructions
2792  */
2793 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2794 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2795 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2796                       CPURISCVState *env)                      \
2797 {                                                              \
2798     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2799     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2800     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2801 }
2802 
2803 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2804 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2805                   void *vs2, CPURISCVState *env,          \
2806                   uint32_t desc)                          \
2807 {                                                         \
2808     uint32_t vm = vext_vm(desc);                          \
2809     uint32_t vl = env->vl;                                \
2810     uint32_t i;                                           \
2811                                                           \
2812     for (i = env->vstart; i < vl; i++) {                  \
2813         if (!vm && !vext_elem_mask(v0, i)) {              \
2814             continue;                                     \
2815         }                                                 \
2816         do_##NAME(vd, vs1, vs2, i, env);                  \
2817     }                                                     \
2818     env->vstart = 0;                                      \
2819 }
2820 
2821 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2822 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2823 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2824 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2825 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2826 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2827 
2828 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2829 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2830                       CPURISCVState *env)                      \
2831 {                                                              \
2832     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2833     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2834 }
2835 
2836 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2837 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2838                   void *vs2, CPURISCVState *env,          \
2839                   uint32_t desc)                          \
2840 {                                                         \
2841     uint32_t vm = vext_vm(desc);                          \
2842     uint32_t vl = env->vl;                                \
2843     uint32_t i;                                           \
2844                                                           \
2845     for (i = env->vstart; i < vl; i++) {                  \
2846         if (!vm && !vext_elem_mask(v0, i)) {              \
2847             continue;                                     \
2848         }                                                 \
2849         do_##NAME(vd, s1, vs2, i, env);                   \
2850     }                                                     \
2851     env->vstart = 0;                                      \
2852 }
2853 
2854 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2855 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2856 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2857 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2858 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2859 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2860 
2861 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2862 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2863 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2864 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2865 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2866 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2867 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2868 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2869 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2870 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2871 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2872 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2873 
2874 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2875 {
2876     return float16_sub(b, a, s);
2877 }
2878 
2879 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2880 {
2881     return float32_sub(b, a, s);
2882 }
2883 
2884 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2885 {
2886     return float64_sub(b, a, s);
2887 }
2888 
2889 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2890 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2891 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2892 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2893 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2894 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2895 
2896 /* Vector Widening Floating-Point Add/Subtract Instructions */
2897 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2898 {
2899     return float32_add(float16_to_float32(a, true, s),
2900             float16_to_float32(b, true, s), s);
2901 }
2902 
2903 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2904 {
2905     return float64_add(float32_to_float64(a, s),
2906             float32_to_float64(b, s), s);
2907 
2908 }
2909 
2910 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2911 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2912 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2913 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2914 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2915 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2916 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2917 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2918 
2919 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2920 {
2921     return float32_sub(float16_to_float32(a, true, s),
2922             float16_to_float32(b, true, s), s);
2923 }
2924 
2925 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2926 {
2927     return float64_sub(float32_to_float64(a, s),
2928             float32_to_float64(b, s), s);
2929 
2930 }
2931 
2932 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2933 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2934 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2935 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2936 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2937 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2938 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2939 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2940 
2941 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2942 {
2943     return float32_add(a, float16_to_float32(b, true, s), s);
2944 }
2945 
2946 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2947 {
2948     return float64_add(a, float32_to_float64(b, s), s);
2949 }
2950 
2951 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2952 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2953 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2954 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2955 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2956 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2957 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2958 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2959 
2960 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2961 {
2962     return float32_sub(a, float16_to_float32(b, true, s), s);
2963 }
2964 
2965 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2966 {
2967     return float64_sub(a, float32_to_float64(b, s), s);
2968 }
2969 
2970 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2971 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2972 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2973 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2974 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2975 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2976 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2977 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2978 
2979 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2980 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2981 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2982 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2983 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2984 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2985 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2986 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2987 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2988 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2989 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2990 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2991 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2992 
2993 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2994 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2995 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
2996 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
2997 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
2998 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
2999 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3000 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3001 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3002 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
3003 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3004 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3005 
3006 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3007 {
3008     return float16_div(b, a, s);
3009 }
3010 
3011 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3012 {
3013     return float32_div(b, a, s);
3014 }
3015 
3016 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3017 {
3018     return float64_div(b, a, s);
3019 }
3020 
3021 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3022 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3023 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3024 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3025 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3026 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3027 
3028 /* Vector Widening Floating-Point Multiply */
3029 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3030 {
3031     return float32_mul(float16_to_float32(a, true, s),
3032             float16_to_float32(b, true, s), s);
3033 }
3034 
3035 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3036 {
3037     return float64_mul(float32_to_float64(a, s),
3038             float32_to_float64(b, s), s);
3039 
3040 }
3041 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3042 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3043 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3044 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3045 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3046 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3047 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3048 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3049 
3050 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3051 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3052 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3053         CPURISCVState *env)                                        \
3054 {                                                                  \
3055     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3056     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3057     TD d = *((TD *)vd + HD(i));                                    \
3058     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3059 }
3060 
3061 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3062 {
3063     return float16_muladd(a, b, d, 0, s);
3064 }
3065 
3066 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3067 {
3068     return float32_muladd(a, b, d, 0, s);
3069 }
3070 
3071 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3072 {
3073     return float64_muladd(a, b, d, 0, s);
3074 }
3075 
3076 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3077 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3078 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3079 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3080 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3081 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3082 
3083 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3084 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3085         CPURISCVState *env)                                       \
3086 {                                                                 \
3087     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3088     TD d = *((TD *)vd + HD(i));                                   \
3089     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3090 }
3091 
3092 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3093 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3094 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3095 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3096 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3097 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3098 
3099 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3100 {
3101     return float16_muladd(a, b, d,
3102             float_muladd_negate_c | float_muladd_negate_product, s);
3103 }
3104 
3105 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3106 {
3107     return float32_muladd(a, b, d,
3108             float_muladd_negate_c | float_muladd_negate_product, s);
3109 }
3110 
3111 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3112 {
3113     return float64_muladd(a, b, d,
3114             float_muladd_negate_c | float_muladd_negate_product, s);
3115 }
3116 
3117 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3118 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3119 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3120 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3121 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3122 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3123 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3124 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3125 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3126 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3127 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3128 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3129 
3130 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3131 {
3132     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3133 }
3134 
3135 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3136 {
3137     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3138 }
3139 
3140 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3141 {
3142     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3143 }
3144 
3145 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3146 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3147 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3148 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3149 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3150 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3151 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3152 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3153 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3154 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3155 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3156 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3157 
3158 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3159 {
3160     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3161 }
3162 
3163 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3164 {
3165     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3166 }
3167 
3168 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3169 {
3170     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3171 }
3172 
3173 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3174 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3175 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3176 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3177 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3178 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3179 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3180 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3181 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3182 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3183 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3184 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3185 
3186 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3187 {
3188     return float16_muladd(d, b, a, 0, s);
3189 }
3190 
3191 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3192 {
3193     return float32_muladd(d, b, a, 0, s);
3194 }
3195 
3196 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3197 {
3198     return float64_muladd(d, b, a, 0, s);
3199 }
3200 
3201 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3202 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3203 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3204 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3205 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3206 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3207 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3208 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3209 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3210 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3211 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3212 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3213 
3214 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3215 {
3216     return float16_muladd(d, b, a,
3217             float_muladd_negate_c | float_muladd_negate_product, s);
3218 }
3219 
3220 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3221 {
3222     return float32_muladd(d, b, a,
3223             float_muladd_negate_c | float_muladd_negate_product, s);
3224 }
3225 
3226 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3227 {
3228     return float64_muladd(d, b, a,
3229             float_muladd_negate_c | float_muladd_negate_product, s);
3230 }
3231 
3232 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3233 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3234 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3235 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3236 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3237 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3238 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3239 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3240 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3241 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3242 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3243 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3244 
3245 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3246 {
3247     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3248 }
3249 
3250 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3251 {
3252     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3253 }
3254 
3255 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3256 {
3257     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3258 }
3259 
3260 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3261 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3262 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3263 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3264 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3265 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3266 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3267 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3268 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3269 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3270 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3271 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3272 
3273 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3274 {
3275     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3276 }
3277 
3278 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3279 {
3280     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3281 }
3282 
3283 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3284 {
3285     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3286 }
3287 
3288 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3289 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3290 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3291 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3292 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3293 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3294 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3295 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3296 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3297 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3298 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3299 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3300 
3301 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3302 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3303 {
3304     return float32_muladd(float16_to_float32(a, true, s),
3305                         float16_to_float32(b, true, s), d, 0, s);
3306 }
3307 
3308 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3309 {
3310     return float64_muladd(float32_to_float64(a, s),
3311                         float32_to_float64(b, s), d, 0, s);
3312 }
3313 
3314 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3315 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3316 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3317 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3318 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3319 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3320 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3321 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3322 
3323 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3324 {
3325     return float32_muladd(float16_to_float32(a, true, s),
3326                         float16_to_float32(b, true, s), d,
3327                         float_muladd_negate_c | float_muladd_negate_product, s);
3328 }
3329 
3330 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3331 {
3332     return float64_muladd(float32_to_float64(a, s),
3333                         float32_to_float64(b, s), d,
3334                         float_muladd_negate_c | float_muladd_negate_product, s);
3335 }
3336 
3337 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3338 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3339 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3340 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3341 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3342 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3343 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3344 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3345 
3346 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3347 {
3348     return float32_muladd(float16_to_float32(a, true, s),
3349                         float16_to_float32(b, true, s), d,
3350                         float_muladd_negate_c, s);
3351 }
3352 
3353 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3354 {
3355     return float64_muladd(float32_to_float64(a, s),
3356                         float32_to_float64(b, s), d,
3357                         float_muladd_negate_c, s);
3358 }
3359 
3360 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3361 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3362 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3363 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3364 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3365 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3366 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3367 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3368 
3369 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3370 {
3371     return float32_muladd(float16_to_float32(a, true, s),
3372                         float16_to_float32(b, true, s), d,
3373                         float_muladd_negate_product, s);
3374 }
3375 
3376 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3377 {
3378     return float64_muladd(float32_to_float64(a, s),
3379                         float32_to_float64(b, s), d,
3380                         float_muladd_negate_product, s);
3381 }
3382 
3383 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3384 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3385 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3386 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3387 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3388 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3389 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3390 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3391 
3392 /* Vector Floating-Point Square-Root Instruction */
3393 /* (TD, T2, TX2) */
3394 #define OP_UU_H uint16_t, uint16_t, uint16_t
3395 #define OP_UU_W uint32_t, uint32_t, uint32_t
3396 #define OP_UU_D uint64_t, uint64_t, uint64_t
3397 
3398 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3399 static void do_##NAME(void *vd, void *vs2, int i,      \
3400         CPURISCVState *env)                            \
3401 {                                                      \
3402     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3403     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3404 }
3405 
3406 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3407 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3408         CPURISCVState *env, uint32_t desc)             \
3409 {                                                      \
3410     uint32_t vm = vext_vm(desc);                       \
3411     uint32_t vl = env->vl;                             \
3412     uint32_t i;                                        \
3413                                                        \
3414     if (vl == 0) {                                     \
3415         return;                                        \
3416     }                                                  \
3417     for (i = env->vstart; i < vl; i++) {               \
3418         if (!vm && !vext_elem_mask(v0, i)) {           \
3419             continue;                                  \
3420         }                                              \
3421         do_##NAME(vd, vs2, i, env);                    \
3422     }                                                  \
3423     env->vstart = 0;                                   \
3424 }
3425 
3426 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3427 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3428 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3429 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3430 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3431 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3432 
3433 /*
3434  * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3435  *
3436  * Adapted from riscv-v-spec recip.c:
3437  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3438  */
3439 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3440 {
3441     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3442     uint64_t exp = extract64(f, frac_size, exp_size);
3443     uint64_t frac = extract64(f, 0, frac_size);
3444 
3445     const uint8_t lookup_table[] = {
3446         52, 51, 50, 48, 47, 46, 44, 43,
3447         42, 41, 40, 39, 38, 36, 35, 34,
3448         33, 32, 31, 30, 30, 29, 28, 27,
3449         26, 25, 24, 23, 23, 22, 21, 20,
3450         19, 19, 18, 17, 16, 16, 15, 14,
3451         14, 13, 12, 12, 11, 10, 10, 9,
3452         9, 8, 7, 7, 6, 6, 5, 4,
3453         4, 3, 3, 2, 2, 1, 1, 0,
3454         127, 125, 123, 121, 119, 118, 116, 114,
3455         113, 111, 109, 108, 106, 105, 103, 102,
3456         100, 99, 97, 96, 95, 93, 92, 91,
3457         90, 88, 87, 86, 85, 84, 83, 82,
3458         80, 79, 78, 77, 76, 75, 74, 73,
3459         72, 71, 70, 70, 69, 68, 67, 66,
3460         65, 64, 63, 63, 62, 61, 60, 59,
3461         59, 58, 57, 56, 56, 55, 54, 53
3462     };
3463     const int precision = 7;
3464 
3465     if (exp == 0 && frac != 0) { /* subnormal */
3466         /* Normalize the subnormal. */
3467         while (extract64(frac, frac_size - 1, 1) == 0) {
3468             exp--;
3469             frac <<= 1;
3470         }
3471 
3472         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3473     }
3474 
3475     int idx = ((exp & 1) << (precision - 1)) |
3476                 (frac >> (frac_size - precision + 1));
3477     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3478                             (frac_size - precision);
3479     uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3480 
3481     uint64_t val = 0;
3482     val = deposit64(val, 0, frac_size, out_frac);
3483     val = deposit64(val, frac_size, exp_size, out_exp);
3484     val = deposit64(val, frac_size + exp_size, 1, sign);
3485     return val;
3486 }
3487 
3488 static float16 frsqrt7_h(float16 f, float_status *s)
3489 {
3490     int exp_size = 5, frac_size = 10;
3491     bool sign = float16_is_neg(f);
3492 
3493     /*
3494      * frsqrt7(sNaN) = canonical NaN
3495      * frsqrt7(-inf) = canonical NaN
3496      * frsqrt7(-normal) = canonical NaN
3497      * frsqrt7(-subnormal) = canonical NaN
3498      */
3499     if (float16_is_signaling_nan(f, s) ||
3500             (float16_is_infinity(f) && sign) ||
3501             (float16_is_normal(f) && sign) ||
3502             (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3503         s->float_exception_flags |= float_flag_invalid;
3504         return float16_default_nan(s);
3505     }
3506 
3507     /* frsqrt7(qNaN) = canonical NaN */
3508     if (float16_is_quiet_nan(f, s)) {
3509         return float16_default_nan(s);
3510     }
3511 
3512     /* frsqrt7(+-0) = +-inf */
3513     if (float16_is_zero(f)) {
3514         s->float_exception_flags |= float_flag_divbyzero;
3515         return float16_set_sign(float16_infinity, sign);
3516     }
3517 
3518     /* frsqrt7(+inf) = +0 */
3519     if (float16_is_infinity(f) && !sign) {
3520         return float16_set_sign(float16_zero, sign);
3521     }
3522 
3523     /* +normal, +subnormal */
3524     uint64_t val = frsqrt7(f, exp_size, frac_size);
3525     return make_float16(val);
3526 }
3527 
3528 static float32 frsqrt7_s(float32 f, float_status *s)
3529 {
3530     int exp_size = 8, frac_size = 23;
3531     bool sign = float32_is_neg(f);
3532 
3533     /*
3534      * frsqrt7(sNaN) = canonical NaN
3535      * frsqrt7(-inf) = canonical NaN
3536      * frsqrt7(-normal) = canonical NaN
3537      * frsqrt7(-subnormal) = canonical NaN
3538      */
3539     if (float32_is_signaling_nan(f, s) ||
3540             (float32_is_infinity(f) && sign) ||
3541             (float32_is_normal(f) && sign) ||
3542             (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3543         s->float_exception_flags |= float_flag_invalid;
3544         return float32_default_nan(s);
3545     }
3546 
3547     /* frsqrt7(qNaN) = canonical NaN */
3548     if (float32_is_quiet_nan(f, s)) {
3549         return float32_default_nan(s);
3550     }
3551 
3552     /* frsqrt7(+-0) = +-inf */
3553     if (float32_is_zero(f)) {
3554         s->float_exception_flags |= float_flag_divbyzero;
3555         return float32_set_sign(float32_infinity, sign);
3556     }
3557 
3558     /* frsqrt7(+inf) = +0 */
3559     if (float32_is_infinity(f) && !sign) {
3560         return float32_set_sign(float32_zero, sign);
3561     }
3562 
3563     /* +normal, +subnormal */
3564     uint64_t val = frsqrt7(f, exp_size, frac_size);
3565     return make_float32(val);
3566 }
3567 
3568 static float64 frsqrt7_d(float64 f, float_status *s)
3569 {
3570     int exp_size = 11, frac_size = 52;
3571     bool sign = float64_is_neg(f);
3572 
3573     /*
3574      * frsqrt7(sNaN) = canonical NaN
3575      * frsqrt7(-inf) = canonical NaN
3576      * frsqrt7(-normal) = canonical NaN
3577      * frsqrt7(-subnormal) = canonical NaN
3578      */
3579     if (float64_is_signaling_nan(f, s) ||
3580             (float64_is_infinity(f) && sign) ||
3581             (float64_is_normal(f) && sign) ||
3582             (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3583         s->float_exception_flags |= float_flag_invalid;
3584         return float64_default_nan(s);
3585     }
3586 
3587     /* frsqrt7(qNaN) = canonical NaN */
3588     if (float64_is_quiet_nan(f, s)) {
3589         return float64_default_nan(s);
3590     }
3591 
3592     /* frsqrt7(+-0) = +-inf */
3593     if (float64_is_zero(f)) {
3594         s->float_exception_flags |= float_flag_divbyzero;
3595         return float64_set_sign(float64_infinity, sign);
3596     }
3597 
3598     /* frsqrt7(+inf) = +0 */
3599     if (float64_is_infinity(f) && !sign) {
3600         return float64_set_sign(float64_zero, sign);
3601     }
3602 
3603     /* +normal, +subnormal */
3604     uint64_t val = frsqrt7(f, exp_size, frac_size);
3605     return make_float64(val);
3606 }
3607 
3608 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3609 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3610 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3611 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
3612 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
3613 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
3614 
3615 /*
3616  * Vector Floating-Point Reciprocal Estimate Instruction
3617  *
3618  * Adapted from riscv-v-spec recip.c:
3619  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3620  */
3621 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3622                       float_status *s)
3623 {
3624     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3625     uint64_t exp = extract64(f, frac_size, exp_size);
3626     uint64_t frac = extract64(f, 0, frac_size);
3627 
3628     const uint8_t lookup_table[] = {
3629         127, 125, 123, 121, 119, 117, 116, 114,
3630         112, 110, 109, 107, 105, 104, 102, 100,
3631         99, 97, 96, 94, 93, 91, 90, 88,
3632         87, 85, 84, 83, 81, 80, 79, 77,
3633         76, 75, 74, 72, 71, 70, 69, 68,
3634         66, 65, 64, 63, 62, 61, 60, 59,
3635         58, 57, 56, 55, 54, 53, 52, 51,
3636         50, 49, 48, 47, 46, 45, 44, 43,
3637         42, 41, 40, 40, 39, 38, 37, 36,
3638         35, 35, 34, 33, 32, 31, 31, 30,
3639         29, 28, 28, 27, 26, 25, 25, 24,
3640         23, 23, 22, 21, 21, 20, 19, 19,
3641         18, 17, 17, 16, 15, 15, 14, 14,
3642         13, 12, 12, 11, 11, 10, 9, 9,
3643         8, 8, 7, 7, 6, 5, 5, 4,
3644         4, 3, 3, 2, 2, 1, 1, 0
3645     };
3646     const int precision = 7;
3647 
3648     if (exp == 0 && frac != 0) { /* subnormal */
3649         /* Normalize the subnormal. */
3650         while (extract64(frac, frac_size - 1, 1) == 0) {
3651             exp--;
3652             frac <<= 1;
3653         }
3654 
3655         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3656 
3657         if (exp != 0 && exp != UINT64_MAX) {
3658             /*
3659              * Overflow to inf or max value of same sign,
3660              * depending on sign and rounding mode.
3661              */
3662             s->float_exception_flags |= (float_flag_inexact |
3663                                          float_flag_overflow);
3664 
3665             if ((s->float_rounding_mode == float_round_to_zero) ||
3666                 ((s->float_rounding_mode == float_round_down) && !sign) ||
3667                 ((s->float_rounding_mode == float_round_up) && sign)) {
3668                 /* Return greatest/negative finite value. */
3669                 return (sign << (exp_size + frac_size)) |
3670                     (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3671             } else {
3672                 /* Return +-inf. */
3673                 return (sign << (exp_size + frac_size)) |
3674                     MAKE_64BIT_MASK(frac_size, exp_size);
3675             }
3676         }
3677     }
3678 
3679     int idx = frac >> (frac_size - precision);
3680     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3681                             (frac_size - precision);
3682     uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3683 
3684     if (out_exp == 0 || out_exp == UINT64_MAX) {
3685         /*
3686          * The result is subnormal, but don't raise the underflow exception,
3687          * because there's no additional loss of precision.
3688          */
3689         out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3690         if (out_exp == UINT64_MAX) {
3691             out_frac >>= 1;
3692             out_exp = 0;
3693         }
3694     }
3695 
3696     uint64_t val = 0;
3697     val = deposit64(val, 0, frac_size, out_frac);
3698     val = deposit64(val, frac_size, exp_size, out_exp);
3699     val = deposit64(val, frac_size + exp_size, 1, sign);
3700     return val;
3701 }
3702 
3703 static float16 frec7_h(float16 f, float_status *s)
3704 {
3705     int exp_size = 5, frac_size = 10;
3706     bool sign = float16_is_neg(f);
3707 
3708     /* frec7(+-inf) = +-0 */
3709     if (float16_is_infinity(f)) {
3710         return float16_set_sign(float16_zero, sign);
3711     }
3712 
3713     /* frec7(+-0) = +-inf */
3714     if (float16_is_zero(f)) {
3715         s->float_exception_flags |= float_flag_divbyzero;
3716         return float16_set_sign(float16_infinity, sign);
3717     }
3718 
3719     /* frec7(sNaN) = canonical NaN */
3720     if (float16_is_signaling_nan(f, s)) {
3721         s->float_exception_flags |= float_flag_invalid;
3722         return float16_default_nan(s);
3723     }
3724 
3725     /* frec7(qNaN) = canonical NaN */
3726     if (float16_is_quiet_nan(f, s)) {
3727         return float16_default_nan(s);
3728     }
3729 
3730     /* +-normal, +-subnormal */
3731     uint64_t val = frec7(f, exp_size, frac_size, s);
3732     return make_float16(val);
3733 }
3734 
3735 static float32 frec7_s(float32 f, float_status *s)
3736 {
3737     int exp_size = 8, frac_size = 23;
3738     bool sign = float32_is_neg(f);
3739 
3740     /* frec7(+-inf) = +-0 */
3741     if (float32_is_infinity(f)) {
3742         return float32_set_sign(float32_zero, sign);
3743     }
3744 
3745     /* frec7(+-0) = +-inf */
3746     if (float32_is_zero(f)) {
3747         s->float_exception_flags |= float_flag_divbyzero;
3748         return float32_set_sign(float32_infinity, sign);
3749     }
3750 
3751     /* frec7(sNaN) = canonical NaN */
3752     if (float32_is_signaling_nan(f, s)) {
3753         s->float_exception_flags |= float_flag_invalid;
3754         return float32_default_nan(s);
3755     }
3756 
3757     /* frec7(qNaN) = canonical NaN */
3758     if (float32_is_quiet_nan(f, s)) {
3759         return float32_default_nan(s);
3760     }
3761 
3762     /* +-normal, +-subnormal */
3763     uint64_t val = frec7(f, exp_size, frac_size, s);
3764     return make_float32(val);
3765 }
3766 
3767 static float64 frec7_d(float64 f, float_status *s)
3768 {
3769     int exp_size = 11, frac_size = 52;
3770     bool sign = float64_is_neg(f);
3771 
3772     /* frec7(+-inf) = +-0 */
3773     if (float64_is_infinity(f)) {
3774         return float64_set_sign(float64_zero, sign);
3775     }
3776 
3777     /* frec7(+-0) = +-inf */
3778     if (float64_is_zero(f)) {
3779         s->float_exception_flags |= float_flag_divbyzero;
3780         return float64_set_sign(float64_infinity, sign);
3781     }
3782 
3783     /* frec7(sNaN) = canonical NaN */
3784     if (float64_is_signaling_nan(f, s)) {
3785         s->float_exception_flags |= float_flag_invalid;
3786         return float64_default_nan(s);
3787     }
3788 
3789     /* frec7(qNaN) = canonical NaN */
3790     if (float64_is_quiet_nan(f, s)) {
3791         return float64_default_nan(s);
3792     }
3793 
3794     /* +-normal, +-subnormal */
3795     uint64_t val = frec7(f, exp_size, frac_size, s);
3796     return make_float64(val);
3797 }
3798 
3799 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3800 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3801 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3802 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
3803 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
3804 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
3805 
3806 /* Vector Floating-Point MIN/MAX Instructions */
3807 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3808 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3809 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3810 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3811 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3812 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3813 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3814 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3815 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3816 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3817 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3818 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3819 
3820 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3821 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3822 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3823 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3824 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3825 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3826 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3827 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3828 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3829 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3830 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3831 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3832 
3833 /* Vector Floating-Point Sign-Injection Instructions */
3834 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3835 {
3836     return deposit64(b, 0, 15, a);
3837 }
3838 
3839 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3840 {
3841     return deposit64(b, 0, 31, a);
3842 }
3843 
3844 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3845 {
3846     return deposit64(b, 0, 63, a);
3847 }
3848 
3849 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3850 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3851 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3852 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3853 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3854 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3855 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3856 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3857 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3858 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3859 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3860 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3861 
3862 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3863 {
3864     return deposit64(~b, 0, 15, a);
3865 }
3866 
3867 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3868 {
3869     return deposit64(~b, 0, 31, a);
3870 }
3871 
3872 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3873 {
3874     return deposit64(~b, 0, 63, a);
3875 }
3876 
3877 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3878 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3879 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3880 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3881 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3882 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3883 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3884 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3885 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3886 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3887 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3888 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3889 
3890 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3891 {
3892     return deposit64(b ^ a, 0, 15, a);
3893 }
3894 
3895 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3896 {
3897     return deposit64(b ^ a, 0, 31, a);
3898 }
3899 
3900 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3901 {
3902     return deposit64(b ^ a, 0, 63, a);
3903 }
3904 
3905 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3906 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3907 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3908 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3909 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3910 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3911 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3912 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3913 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3914 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3915 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3916 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3917 
3918 /* Vector Floating-Point Compare Instructions */
3919 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3920 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3921                   CPURISCVState *env, uint32_t desc)          \
3922 {                                                             \
3923     uint32_t vm = vext_vm(desc);                              \
3924     uint32_t vl = env->vl;                                    \
3925     uint32_t i;                                               \
3926                                                               \
3927     for (i = env->vstart; i < vl; i++) {                      \
3928         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3929         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3930         if (!vm && !vext_elem_mask(v0, i)) {                  \
3931             continue;                                         \
3932         }                                                     \
3933         vext_set_elem_mask(vd, i,                             \
3934                            DO_OP(s2, s1, &env->fp_status));   \
3935     }                                                         \
3936     env->vstart = 0;                                          \
3937 }
3938 
3939 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3940 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3941 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3942 
3943 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3944 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3945                   CPURISCVState *env, uint32_t desc)                \
3946 {                                                                   \
3947     uint32_t vm = vext_vm(desc);                                    \
3948     uint32_t vl = env->vl;                                          \
3949     uint32_t i;                                                     \
3950                                                                     \
3951     for (i = env->vstart; i < vl; i++) {                            \
3952         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3953         if (!vm && !vext_elem_mask(v0, i)) {                        \
3954             continue;                                               \
3955         }                                                           \
3956         vext_set_elem_mask(vd, i,                                   \
3957                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3958     }                                                               \
3959     env->vstart = 0;                                                \
3960 }
3961 
3962 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3963 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3964 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3965 
3966 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3967 {
3968     FloatRelation compare = float16_compare_quiet(a, b, s);
3969     return compare != float_relation_equal;
3970 }
3971 
3972 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3973 {
3974     FloatRelation compare = float32_compare_quiet(a, b, s);
3975     return compare != float_relation_equal;
3976 }
3977 
3978 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3979 {
3980     FloatRelation compare = float64_compare_quiet(a, b, s);
3981     return compare != float_relation_equal;
3982 }
3983 
3984 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3985 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3986 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3987 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3988 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3989 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3990 
3991 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3992 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3993 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3994 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3995 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3996 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3997 
3998 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3999 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4000 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4001 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4002 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4003 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4004 
4005 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4006 {
4007     FloatRelation compare = float16_compare(a, b, s);
4008     return compare == float_relation_greater;
4009 }
4010 
4011 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4012 {
4013     FloatRelation compare = float32_compare(a, b, s);
4014     return compare == float_relation_greater;
4015 }
4016 
4017 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4018 {
4019     FloatRelation compare = float64_compare(a, b, s);
4020     return compare == float_relation_greater;
4021 }
4022 
4023 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4024 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4025 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4026 
4027 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4028 {
4029     FloatRelation compare = float16_compare(a, b, s);
4030     return compare == float_relation_greater ||
4031            compare == float_relation_equal;
4032 }
4033 
4034 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4035 {
4036     FloatRelation compare = float32_compare(a, b, s);
4037     return compare == float_relation_greater ||
4038            compare == float_relation_equal;
4039 }
4040 
4041 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4042 {
4043     FloatRelation compare = float64_compare(a, b, s);
4044     return compare == float_relation_greater ||
4045            compare == float_relation_equal;
4046 }
4047 
4048 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4049 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4050 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4051 
4052 /* Vector Floating-Point Classify Instruction */
4053 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4054 static void do_##NAME(void *vd, void *vs2, int i)      \
4055 {                                                      \
4056     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4057     *((TD *)vd + HD(i)) = OP(s2);                      \
4058 }
4059 
4060 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
4061 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4062                   CPURISCVState *env, uint32_t desc)   \
4063 {                                                      \
4064     uint32_t vm = vext_vm(desc);                       \
4065     uint32_t vl = env->vl;                             \
4066     uint32_t i;                                        \
4067                                                        \
4068     for (i = env->vstart; i < vl; i++) {               \
4069         if (!vm && !vext_elem_mask(v0, i)) {           \
4070             continue;                                  \
4071         }                                              \
4072         do_##NAME(vd, vs2, i);                         \
4073     }                                                  \
4074     env->vstart = 0;                                   \
4075 }
4076 
4077 target_ulong fclass_h(uint64_t frs1)
4078 {
4079     float16 f = frs1;
4080     bool sign = float16_is_neg(f);
4081 
4082     if (float16_is_infinity(f)) {
4083         return sign ? 1 << 0 : 1 << 7;
4084     } else if (float16_is_zero(f)) {
4085         return sign ? 1 << 3 : 1 << 4;
4086     } else if (float16_is_zero_or_denormal(f)) {
4087         return sign ? 1 << 2 : 1 << 5;
4088     } else if (float16_is_any_nan(f)) {
4089         float_status s = { }; /* for snan_bit_is_one */
4090         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4091     } else {
4092         return sign ? 1 << 1 : 1 << 6;
4093     }
4094 }
4095 
4096 target_ulong fclass_s(uint64_t frs1)
4097 {
4098     float32 f = frs1;
4099     bool sign = float32_is_neg(f);
4100 
4101     if (float32_is_infinity(f)) {
4102         return sign ? 1 << 0 : 1 << 7;
4103     } else if (float32_is_zero(f)) {
4104         return sign ? 1 << 3 : 1 << 4;
4105     } else if (float32_is_zero_or_denormal(f)) {
4106         return sign ? 1 << 2 : 1 << 5;
4107     } else if (float32_is_any_nan(f)) {
4108         float_status s = { }; /* for snan_bit_is_one */
4109         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4110     } else {
4111         return sign ? 1 << 1 : 1 << 6;
4112     }
4113 }
4114 
4115 target_ulong fclass_d(uint64_t frs1)
4116 {
4117     float64 f = frs1;
4118     bool sign = float64_is_neg(f);
4119 
4120     if (float64_is_infinity(f)) {
4121         return sign ? 1 << 0 : 1 << 7;
4122     } else if (float64_is_zero(f)) {
4123         return sign ? 1 << 3 : 1 << 4;
4124     } else if (float64_is_zero_or_denormal(f)) {
4125         return sign ? 1 << 2 : 1 << 5;
4126     } else if (float64_is_any_nan(f)) {
4127         float_status s = { }; /* for snan_bit_is_one */
4128         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4129     } else {
4130         return sign ? 1 << 1 : 1 << 6;
4131     }
4132 }
4133 
4134 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4135 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4136 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4137 GEN_VEXT_V(vfclass_v_h, 2, 2)
4138 GEN_VEXT_V(vfclass_v_w, 4, 4)
4139 GEN_VEXT_V(vfclass_v_d, 8, 8)
4140 
4141 /* Vector Floating-Point Merge Instruction */
4142 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
4143 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4144                   CPURISCVState *env, uint32_t desc)          \
4145 {                                                             \
4146     uint32_t vm = vext_vm(desc);                              \
4147     uint32_t vl = env->vl;                                    \
4148     uint32_t i;                                               \
4149                                                               \
4150     for (i = env->vstart; i < vl; i++) {                      \
4151         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4152         *((ETYPE *)vd + H(i))                                 \
4153           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
4154     }                                                         \
4155     env->vstart = 0;                                          \
4156 }
4157 
4158 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4159 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4160 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4161 
4162 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4163 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4164 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4165 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4166 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4167 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
4168 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
4169 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
4170 
4171 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4172 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4173 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4174 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4175 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
4176 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
4177 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
4178 
4179 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4180 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4181 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4182 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4183 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
4184 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
4185 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
4186 
4187 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4188 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4189 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4190 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4191 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
4192 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
4193 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4194 
4195 /* Widening Floating-Point/Integer Type-Convert Instructions */
4196 /* (TD, T2, TX2) */
4197 #define WOP_UU_B uint16_t, uint8_t,  uint8_t
4198 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4199 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4200 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4201 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4202 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4203 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
4204 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4205 
4206 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4207 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4208 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4209 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
4210 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4211 
4212 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4213 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4214 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4215 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4216 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
4217 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
4218 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4219 
4220 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4221 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4222 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4223 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4224 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
4225 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
4226 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4227 
4228 /*
4229  * vfwcvt.f.f.v vd, vs2, vm
4230  * Convert single-width float to double-width float.
4231  */
4232 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4233 {
4234     return float16_to_float32(a, true, s);
4235 }
4236 
4237 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4238 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4239 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
4240 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
4241 
4242 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4243 /* (TD, T2, TX2) */
4244 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
4245 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4246 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4247 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4248 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4249 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4250 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4251 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
4252 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
4253 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
4254 
4255 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4256 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4257 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4258 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4259 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
4260 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
4261 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
4262 
4263 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4264 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4265 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4266 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
4267 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
4268 
4269 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4270 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4271 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4272 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
4273 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
4274 
4275 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4276 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4277 {
4278     return float32_to_float16(a, true, s);
4279 }
4280 
4281 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4282 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4283 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
4284 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
4285 
4286 /*
4287  *** Vector Reduction Operations
4288  */
4289 /* Vector Single-Width Integer Reduction Instructions */
4290 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4291 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4292         void *vs2, CPURISCVState *env, uint32_t desc)     \
4293 {                                                         \
4294     uint32_t vm = vext_vm(desc);                          \
4295     uint32_t vl = env->vl;                                \
4296     uint32_t i;                                           \
4297     TD s1 =  *((TD *)vs1 + HD(0));                        \
4298                                                           \
4299     for (i = env->vstart; i < vl; i++) {                  \
4300         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4301         if (!vm && !vext_elem_mask(v0, i)) {              \
4302             continue;                                     \
4303         }                                                 \
4304         s1 = OP(s1, (TD)s2);                              \
4305     }                                                     \
4306     *((TD *)vd + HD(0)) = s1;                             \
4307     env->vstart = 0;                                      \
4308 }
4309 
4310 /* vd[0] = sum(vs1[0], vs2[*]) */
4311 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4312 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4313 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4314 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4315 
4316 /* vd[0] = maxu(vs1[0], vs2[*]) */
4317 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4318 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4319 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4320 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4321 
4322 /* vd[0] = max(vs1[0], vs2[*]) */
4323 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4324 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4325 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4326 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4327 
4328 /* vd[0] = minu(vs1[0], vs2[*]) */
4329 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4330 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4331 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4332 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4333 
4334 /* vd[0] = min(vs1[0], vs2[*]) */
4335 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4336 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4337 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4338 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4339 
4340 /* vd[0] = and(vs1[0], vs2[*]) */
4341 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4342 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4343 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4344 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4345 
4346 /* vd[0] = or(vs1[0], vs2[*]) */
4347 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4348 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4349 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4350 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4351 
4352 /* vd[0] = xor(vs1[0], vs2[*]) */
4353 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4354 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4355 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4356 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4357 
4358 /* Vector Widening Integer Reduction Instructions */
4359 /* signed sum reduction into double-width accumulator */
4360 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4361 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4362 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4363 
4364 /* Unsigned sum reduction into double-width accumulator */
4365 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4366 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4367 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4368 
4369 /* Vector Single-Width Floating-Point Reduction Instructions */
4370 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4371 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4372                   void *vs2, CPURISCVState *env,           \
4373                   uint32_t desc)                           \
4374 {                                                          \
4375     uint32_t vm = vext_vm(desc);                           \
4376     uint32_t vl = env->vl;                                 \
4377     uint32_t i;                                            \
4378     TD s1 =  *((TD *)vs1 + HD(0));                         \
4379                                                            \
4380     for (i = env->vstart; i < vl; i++) {                   \
4381         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4382         if (!vm && !vext_elem_mask(v0, i)) {               \
4383             continue;                                      \
4384         }                                                  \
4385         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4386     }                                                      \
4387     *((TD *)vd + HD(0)) = s1;                              \
4388     env->vstart = 0;                                       \
4389 }
4390 
4391 /* Unordered sum */
4392 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4393 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4394 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4395 
4396 /* Maximum value */
4397 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4398 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4399 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4400 
4401 /* Minimum value */
4402 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4403 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4404 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4405 
4406 /* Vector Widening Floating-Point Reduction Instructions */
4407 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4408 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4409                             void *vs2, CPURISCVState *env, uint32_t desc)
4410 {
4411     uint32_t vm = vext_vm(desc);
4412     uint32_t vl = env->vl;
4413     uint32_t i;
4414     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4415 
4416     for (i = env->vstart; i < vl; i++) {
4417         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4418         if (!vm && !vext_elem_mask(v0, i)) {
4419             continue;
4420         }
4421         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4422                          &env->fp_status);
4423     }
4424     *((uint32_t *)vd + H4(0)) = s1;
4425     env->vstart = 0;
4426 }
4427 
4428 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4429                             void *vs2, CPURISCVState *env, uint32_t desc)
4430 {
4431     uint32_t vm = vext_vm(desc);
4432     uint32_t vl = env->vl;
4433     uint32_t i;
4434     uint64_t s1 =  *((uint64_t *)vs1);
4435 
4436     for (i = env->vstart; i < vl; i++) {
4437         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4438         if (!vm && !vext_elem_mask(v0, i)) {
4439             continue;
4440         }
4441         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4442                          &env->fp_status);
4443     }
4444     *((uint64_t *)vd) = s1;
4445     env->vstart = 0;
4446 }
4447 
4448 /*
4449  *** Vector Mask Operations
4450  */
4451 /* Vector Mask-Register Logical Instructions */
4452 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4453 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4454                   void *vs2, CPURISCVState *env,          \
4455                   uint32_t desc)                          \
4456 {                                                         \
4457     uint32_t vl = env->vl;                                \
4458     uint32_t i;                                           \
4459     int a, b;                                             \
4460                                                           \
4461     for (i = env->vstart; i < vl; i++) {                  \
4462         a = vext_elem_mask(vs1, i);                       \
4463         b = vext_elem_mask(vs2, i);                       \
4464         vext_set_elem_mask(vd, i, OP(b, a));              \
4465     }                                                     \
4466     env->vstart = 0;                                      \
4467 }
4468 
4469 #define DO_NAND(N, M)  (!(N & M))
4470 #define DO_ANDNOT(N, M)  (N & !M)
4471 #define DO_NOR(N, M)  (!(N | M))
4472 #define DO_ORNOT(N, M)  (N | !M)
4473 #define DO_XNOR(N, M)  (!(N ^ M))
4474 
4475 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4476 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4477 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4478 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4479 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4480 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4481 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4482 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4483 
4484 /* Vector count population in mask vcpop */
4485 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4486                              uint32_t desc)
4487 {
4488     target_ulong cnt = 0;
4489     uint32_t vm = vext_vm(desc);
4490     uint32_t vl = env->vl;
4491     int i;
4492 
4493     for (i = env->vstart; i < vl; i++) {
4494         if (vm || vext_elem_mask(v0, i)) {
4495             if (vext_elem_mask(vs2, i)) {
4496                 cnt++;
4497             }
4498         }
4499     }
4500     env->vstart = 0;
4501     return cnt;
4502 }
4503 
4504 /* vfirst find-first-set mask bit*/
4505 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4506                               uint32_t desc)
4507 {
4508     uint32_t vm = vext_vm(desc);
4509     uint32_t vl = env->vl;
4510     int i;
4511 
4512     for (i = env->vstart; i < vl; i++) {
4513         if (vm || vext_elem_mask(v0, i)) {
4514             if (vext_elem_mask(vs2, i)) {
4515                 return i;
4516             }
4517         }
4518     }
4519     env->vstart = 0;
4520     return -1LL;
4521 }
4522 
4523 enum set_mask_type {
4524     ONLY_FIRST = 1,
4525     INCLUDE_FIRST,
4526     BEFORE_FIRST,
4527 };
4528 
4529 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4530                    uint32_t desc, enum set_mask_type type)
4531 {
4532     uint32_t vm = vext_vm(desc);
4533     uint32_t vl = env->vl;
4534     int i;
4535     bool first_mask_bit = false;
4536 
4537     for (i = env->vstart; i < vl; i++) {
4538         if (!vm && !vext_elem_mask(v0, i)) {
4539             continue;
4540         }
4541         /* write a zero to all following active elements */
4542         if (first_mask_bit) {
4543             vext_set_elem_mask(vd, i, 0);
4544             continue;
4545         }
4546         if (vext_elem_mask(vs2, i)) {
4547             first_mask_bit = true;
4548             if (type == BEFORE_FIRST) {
4549                 vext_set_elem_mask(vd, i, 0);
4550             } else {
4551                 vext_set_elem_mask(vd, i, 1);
4552             }
4553         } else {
4554             if (type == ONLY_FIRST) {
4555                 vext_set_elem_mask(vd, i, 0);
4556             } else {
4557                 vext_set_elem_mask(vd, i, 1);
4558             }
4559         }
4560     }
4561     env->vstart = 0;
4562 }
4563 
4564 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4565                      uint32_t desc)
4566 {
4567     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4568 }
4569 
4570 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4571                      uint32_t desc)
4572 {
4573     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4574 }
4575 
4576 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4577                      uint32_t desc)
4578 {
4579     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4580 }
4581 
4582 /* Vector Iota Instruction */
4583 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4584 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4585                   uint32_t desc)                                          \
4586 {                                                                         \
4587     uint32_t vm = vext_vm(desc);                                          \
4588     uint32_t vl = env->vl;                                                \
4589     uint32_t sum = 0;                                                     \
4590     int i;                                                                \
4591                                                                           \
4592     for (i = env->vstart; i < vl; i++) {                                  \
4593         if (!vm && !vext_elem_mask(v0, i)) {                              \
4594             continue;                                                     \
4595         }                                                                 \
4596         *((ETYPE *)vd + H(i)) = sum;                                      \
4597         if (vext_elem_mask(vs2, i)) {                                     \
4598             sum++;                                                        \
4599         }                                                                 \
4600     }                                                                     \
4601     env->vstart = 0;                                                      \
4602 }
4603 
4604 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4605 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4606 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4607 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4608 
4609 /* Vector Element Index Instruction */
4610 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4611 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4612 {                                                                         \
4613     uint32_t vm = vext_vm(desc);                                          \
4614     uint32_t vl = env->vl;                                                \
4615     int i;                                                                \
4616                                                                           \
4617     for (i = env->vstart; i < vl; i++) {                                  \
4618         if (!vm && !vext_elem_mask(v0, i)) {                              \
4619             continue;                                                     \
4620         }                                                                 \
4621         *((ETYPE *)vd + H(i)) = i;                                        \
4622     }                                                                     \
4623     env->vstart = 0;                                                      \
4624 }
4625 
4626 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4627 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4628 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4629 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4630 
4631 /*
4632  *** Vector Permutation Instructions
4633  */
4634 
4635 /* Vector Slide Instructions */
4636 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4637 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4638                   CPURISCVState *env, uint32_t desc)                      \
4639 {                                                                         \
4640     uint32_t vm = vext_vm(desc);                                          \
4641     uint32_t vl = env->vl;                                                \
4642     target_ulong offset = s1, i_min, i;                                   \
4643                                                                           \
4644     i_min = MAX(env->vstart, offset);                                     \
4645     for (i = i_min; i < vl; i++) {                                        \
4646         if (!vm && !vext_elem_mask(v0, i)) {                              \
4647             continue;                                                     \
4648         }                                                                 \
4649         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4650     }                                                                     \
4651 }
4652 
4653 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4654 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4655 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4656 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4657 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4658 
4659 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4660 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4661                   CPURISCVState *env, uint32_t desc)                      \
4662 {                                                                         \
4663     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4664     uint32_t vm = vext_vm(desc);                                          \
4665     uint32_t vl = env->vl;                                                \
4666     target_ulong i_max, i;                                                \
4667                                                                           \
4668     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4669     for (i = env->vstart; i < i_max; ++i) {                               \
4670         if (vm || vext_elem_mask(v0, i)) {                                \
4671             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4672         }                                                                 \
4673     }                                                                     \
4674                                                                           \
4675     for (i = i_max; i < vl; ++i) {                                        \
4676         if (vm || vext_elem_mask(v0, i)) {                                \
4677             *((ETYPE *)vd + H(i)) = 0;                                    \
4678         }                                                                 \
4679     }                                                                     \
4680                                                                           \
4681     env->vstart = 0;                                                      \
4682 }
4683 
4684 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4685 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4686 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4687 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4688 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4689 
4690 #define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4691 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4692                      CPURISCVState *env, uint32_t desc)                     \
4693 {                                                                           \
4694     typedef uint##ESZ##_t ETYPE;                                            \
4695     uint32_t vm = vext_vm(desc);                                            \
4696     uint32_t vl = env->vl;                                                  \
4697     uint32_t i;                                                             \
4698                                                                             \
4699     for (i = env->vstart; i < vl; i++) {                                    \
4700         if (!vm && !vext_elem_mask(v0, i)) {                                \
4701             continue;                                                       \
4702         }                                                                   \
4703         if (i == 0) {                                                       \
4704             *((ETYPE *)vd + H(i)) = s1;                                     \
4705         } else {                                                            \
4706             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4707         }                                                                   \
4708     }                                                                       \
4709     env->vstart = 0;                                                        \
4710 }
4711 
4712 GEN_VEXT_VSLIE1UP(8,  H1)
4713 GEN_VEXT_VSLIE1UP(16, H2)
4714 GEN_VEXT_VSLIE1UP(32, H4)
4715 GEN_VEXT_VSLIE1UP(64, H8)
4716 
4717 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4718 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4719                   CPURISCVState *env, uint32_t desc)              \
4720 {                                                                 \
4721     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4722 }
4723 
4724 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4725 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4726 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4727 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4728 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4729 
4730 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4731 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4732                        CPURISCVState *env, uint32_t desc)                     \
4733 {                                                                             \
4734     typedef uint##ESZ##_t ETYPE;                                              \
4735     uint32_t vm = vext_vm(desc);                                              \
4736     uint32_t vl = env->vl;                                                    \
4737     uint32_t i;                                                               \
4738                                                                               \
4739     for (i = env->vstart; i < vl; i++) {                                      \
4740         if (!vm && !vext_elem_mask(v0, i)) {                                  \
4741             continue;                                                         \
4742         }                                                                     \
4743         if (i == vl - 1) {                                                    \
4744             *((ETYPE *)vd + H(i)) = s1;                                       \
4745         } else {                                                              \
4746             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4747         }                                                                     \
4748     }                                                                         \
4749     env->vstart = 0;                                                          \
4750 }
4751 
4752 GEN_VEXT_VSLIDE1DOWN(8,  H1)
4753 GEN_VEXT_VSLIDE1DOWN(16, H2)
4754 GEN_VEXT_VSLIDE1DOWN(32, H4)
4755 GEN_VEXT_VSLIDE1DOWN(64, H8)
4756 
4757 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4758 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4759                   CPURISCVState *env, uint32_t desc)              \
4760 {                                                                 \
4761     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4762 }
4763 
4764 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4765 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4766 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4767 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4768 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4769 
4770 /* Vector Floating-Point Slide Instructions */
4771 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4772 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4773                   CPURISCVState *env, uint32_t desc)          \
4774 {                                                             \
4775     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4776 }
4777 
4778 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4779 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4780 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4781 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4782 
4783 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4784 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4785                   CPURISCVState *env, uint32_t desc)          \
4786 {                                                             \
4787     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4788 }
4789 
4790 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4791 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4792 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4793 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4794 
4795 /* Vector Register Gather Instruction */
4796 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4797 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4798                   CPURISCVState *env, uint32_t desc)                      \
4799 {                                                                         \
4800     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4801     uint32_t vm = vext_vm(desc);                                          \
4802     uint32_t vl = env->vl;                                                \
4803     uint64_t index;                                                       \
4804     uint32_t i;                                                           \
4805                                                                           \
4806     for (i = env->vstart; i < vl; i++) {                                  \
4807         if (!vm && !vext_elem_mask(v0, i)) {                              \
4808             continue;                                                     \
4809         }                                                                 \
4810         index = *((TS1 *)vs1 + HS1(i));                                   \
4811         if (index >= vlmax) {                                             \
4812             *((TS2 *)vd + HS2(i)) = 0;                                    \
4813         } else {                                                          \
4814             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4815         }                                                                 \
4816     }                                                                     \
4817     env->vstart = 0;                                                      \
4818 }
4819 
4820 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4821 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4822 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4823 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4824 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4825 
4826 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4827 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4828 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4829 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4830 
4831 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4832 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4833                   CPURISCVState *env, uint32_t desc)                      \
4834 {                                                                         \
4835     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4836     uint32_t vm = vext_vm(desc);                                          \
4837     uint32_t vl = env->vl;                                                \
4838     uint64_t index = s1;                                                  \
4839     uint32_t i;                                                           \
4840                                                                           \
4841     for (i = env->vstart; i < vl; i++) {                                  \
4842         if (!vm && !vext_elem_mask(v0, i)) {                              \
4843             continue;                                                     \
4844         }                                                                 \
4845         if (index >= vlmax) {                                             \
4846             *((ETYPE *)vd + H(i)) = 0;                                    \
4847         } else {                                                          \
4848             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4849         }                                                                 \
4850     }                                                                     \
4851     env->vstart = 0;                                                      \
4852 }
4853 
4854 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4855 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4856 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4857 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4858 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4859 
4860 /* Vector Compress Instruction */
4861 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4862 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4863                   CPURISCVState *env, uint32_t desc)                      \
4864 {                                                                         \
4865     uint32_t vl = env->vl;                                                \
4866     uint32_t num = 0, i;                                                  \
4867                                                                           \
4868     for (i = env->vstart; i < vl; i++) {                                  \
4869         if (!vext_elem_mask(vs1, i)) {                                    \
4870             continue;                                                     \
4871         }                                                                 \
4872         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4873         num++;                                                            \
4874     }                                                                     \
4875     env->vstart = 0;                                                      \
4876 }
4877 
4878 /* Compress into vd elements of vs2 where vs1 is enabled */
4879 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4880 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4881 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4882 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4883 
4884 /* Vector Whole Register Move */
4885 #define GEN_VEXT_VMV_WHOLE(NAME, LEN)                      \
4886 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
4887                   uint32_t desc)                           \
4888 {                                                          \
4889     /* EEW = 8 */                                          \
4890     uint32_t maxsz = simd_maxsz(desc);                     \
4891     uint32_t i = env->vstart;                              \
4892                                                            \
4893     memcpy((uint8_t *)vd + H1(i),                          \
4894            (uint8_t *)vs2 + H1(i),                         \
4895            maxsz - env->vstart);                           \
4896                                                            \
4897     env->vstart = 0;                                       \
4898 }
4899 
4900 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
4901 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
4902 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
4903 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
4904 
4905 /* Vector Integer Extension */
4906 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4907 void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4908                   CPURISCVState *env, uint32_t desc)             \
4909 {                                                                \
4910     uint32_t vl = env->vl;                                       \
4911     uint32_t vm = vext_vm(desc);                                 \
4912     uint32_t i;                                                  \
4913                                                                  \
4914     for (i = env->vstart; i < vl; i++) {                         \
4915         if (!vm && !vext_elem_mask(v0, i)) {                     \
4916             continue;                                            \
4917         }                                                        \
4918         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4919     }                                                            \
4920     env->vstart = 0;                                             \
4921 }
4922 
4923 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4924 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4925 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4926 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4927 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4928 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4929 
4930 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4931 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4932 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4933 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4934 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4935 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4936