xref: /openbmc/qemu/target/riscv/vector_helper.c (revision f06193c4)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30 
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32                             target_ulong s2)
33 {
34     int vlmax, vl;
35     RISCVCPU *cpu = env_archcpu(env);
36     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39     int xlen = riscv_cpu_xlen(env);
40     bool vill = (s2 >> (xlen - 1)) & 0x1;
41     target_ulong reserved = s2 &
42                             MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43                                             xlen - 1 - R_VTYPE_RESERVED_SHIFT);
44 
45     if (lmul & 4) {
46         /* Fractional LMUL. */
47         if (lmul == 4 ||
48             cpu->cfg.elen >> (8 - lmul) < sew) {
49             vill = true;
50         }
51     }
52 
53     if ((sew > cpu->cfg.elen)
54         || vill
55         || (ediv != 0)
56         || (reserved != 0)) {
57         /* only set vill bit. */
58         env->vill = 1;
59         env->vtype = 0;
60         env->vl = 0;
61         env->vstart = 0;
62         return 0;
63     }
64 
65     vlmax = vext_get_vlmax(cpu, s2);
66     if (s1 <= vlmax) {
67         vl = s1;
68     } else {
69         vl = vlmax;
70     }
71     env->vl = vl;
72     env->vtype = s2;
73     env->vstart = 0;
74     env->vill = 0;
75     return vl;
76 }
77 
78 /*
79  * Note that vector data is stored in host-endian 64-bit chunks,
80  * so addressing units smaller than that needs a host-endian fixup.
81  */
82 #if HOST_BIG_ENDIAN
83 #define H1(x)   ((x) ^ 7)
84 #define H1_2(x) ((x) ^ 6)
85 #define H1_4(x) ((x) ^ 4)
86 #define H2(x)   ((x) ^ 3)
87 #define H4(x)   ((x) ^ 1)
88 #define H8(x)   ((x))
89 #else
90 #define H1(x)   (x)
91 #define H1_2(x) (x)
92 #define H1_4(x) (x)
93 #define H2(x)   (x)
94 #define H4(x)   (x)
95 #define H8(x)   (x)
96 #endif
97 
98 static inline uint32_t vext_nf(uint32_t desc)
99 {
100     return FIELD_EX32(simd_data(desc), VDATA, NF);
101 }
102 
103 static inline uint32_t vext_vm(uint32_t desc)
104 {
105     return FIELD_EX32(simd_data(desc), VDATA, VM);
106 }
107 
108 /*
109  * Encode LMUL to lmul as following:
110  *     LMUL    vlmul    lmul
111  *      1       000       0
112  *      2       001       1
113  *      4       010       2
114  *      8       011       3
115  *      -       100       -
116  *     1/8      101      -3
117  *     1/4      110      -2
118  *     1/2      111      -1
119  */
120 static inline int32_t vext_lmul(uint32_t desc)
121 {
122     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
123 }
124 
125 /*
126  * Get the maximum number of elements can be operated.
127  *
128  * esz: log2 of element size in bytes.
129  */
130 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
131 {
132     /*
133      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
134      * so vlen in bytes (vlenb) is encoded as maxsz.
135      */
136     uint32_t vlenb = simd_maxsz(desc);
137 
138     /* Return VLMAX */
139     int scale = vext_lmul(desc) - esz;
140     return scale < 0 ? vlenb >> -scale : vlenb << scale;
141 }
142 
143 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
144 {
145     return (addr & env->cur_pmmask) | env->cur_pmbase;
146 }
147 
148 /*
149  * This function checks watchpoint before real load operation.
150  *
151  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
152  * In user mode, there is no watchpoint support now.
153  *
154  * It will trigger an exception if there is no mapping in TLB
155  * and page table walk can't fill the TLB entry. Then the guest
156  * software can return here after process the exception or never return.
157  */
158 static void probe_pages(CPURISCVState *env, target_ulong addr,
159                         target_ulong len, uintptr_t ra,
160                         MMUAccessType access_type)
161 {
162     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
163     target_ulong curlen = MIN(pagelen, len);
164 
165     probe_access(env, adjust_addr(env, addr), curlen, access_type,
166                  cpu_mmu_index(env, false), ra);
167     if (len > curlen) {
168         addr += curlen;
169         curlen = len - curlen;
170         probe_access(env, adjust_addr(env, addr), curlen, access_type,
171                      cpu_mmu_index(env, false), ra);
172     }
173 }
174 
175 static inline void vext_set_elem_mask(void *v0, int index,
176                                       uint8_t value)
177 {
178     int idx = index / 64;
179     int pos = index % 64;
180     uint64_t old = ((uint64_t *)v0)[idx];
181     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
182 }
183 
184 /*
185  * Earlier designs (pre-0.9) had a varying number of bits
186  * per mask value (MLEN). In the 0.9 design, MLEN=1.
187  * (Section 4.5)
188  */
189 static inline int vext_elem_mask(void *v0, int index)
190 {
191     int idx = index / 64;
192     int pos = index  % 64;
193     return (((uint64_t *)v0)[idx] >> pos) & 1;
194 }
195 
196 /* elements operations for load and store */
197 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
198                                uint32_t idx, void *vd, uintptr_t retaddr);
199 
200 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
201 static void NAME(CPURISCVState *env, abi_ptr addr,         \
202                  uint32_t idx, void *vd, uintptr_t retaddr)\
203 {                                                          \
204     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
205     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
206 }                                                          \
207 
208 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
209 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
210 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
211 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
212 
213 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
214 static void NAME(CPURISCVState *env, abi_ptr addr,         \
215                  uint32_t idx, void *vd, uintptr_t retaddr)\
216 {                                                          \
217     ETYPE data = *((ETYPE *)vd + H(idx));                  \
218     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
219 }
220 
221 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
222 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
223 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
224 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
225 
226 /*
227  *** stride: access vector element from strided memory
228  */
229 static void
230 vext_ldst_stride(void *vd, void *v0, target_ulong base,
231                  target_ulong stride, CPURISCVState *env,
232                  uint32_t desc, uint32_t vm,
233                  vext_ldst_elem_fn *ldst_elem,
234                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
235 {
236     uint32_t i, k;
237     uint32_t nf = vext_nf(desc);
238     uint32_t max_elems = vext_max_elems(desc, esz);
239 
240     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
241         if (!vm && !vext_elem_mask(v0, i)) {
242             continue;
243         }
244 
245         k = 0;
246         while (k < nf) {
247             target_ulong addr = base + stride * i + (k << esz);
248             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
249             k++;
250         }
251     }
252     env->vstart = 0;
253 }
254 
255 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
256 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
257                   target_ulong stride, CPURISCVState *env,              \
258                   uint32_t desc)                                        \
259 {                                                                       \
260     uint32_t vm = vext_vm(desc);                                        \
261     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
262                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
263 }
264 
265 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
266 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
267 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
268 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
269 
270 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
271 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
272                   target_ulong stride, CPURISCVState *env,              \
273                   uint32_t desc)                                        \
274 {                                                                       \
275     uint32_t vm = vext_vm(desc);                                        \
276     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
277                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
278 }
279 
280 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
281 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
282 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
283 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
284 
285 /*
286  *** unit-stride: access elements stored contiguously in memory
287  */
288 
289 /* unmasked unit-stride load and store operation*/
290 static void
291 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
292              vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
293              uintptr_t ra, MMUAccessType access_type)
294 {
295     uint32_t i, k;
296     uint32_t nf = vext_nf(desc);
297     uint32_t max_elems = vext_max_elems(desc, esz);
298 
299     /* load bytes from guest memory */
300     for (i = env->vstart; i < evl; i++, env->vstart++) {
301         k = 0;
302         while (k < nf) {
303             target_ulong addr = base + ((i * nf + k) << esz);
304             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
305             k++;
306         }
307     }
308     env->vstart = 0;
309 }
310 
311 /*
312  * masked unit-stride load and store operation will be a special case of stride,
313  * stride = NF * sizeof (MTYPE)
314  */
315 
316 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
317 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
318                          CPURISCVState *env, uint32_t desc)             \
319 {                                                                       \
320     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
321     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
322                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
323 }                                                                       \
324                                                                         \
325 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
326                   CPURISCVState *env, uint32_t desc)                    \
327 {                                                                       \
328     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
329                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
330 }
331 
332 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
333 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
334 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
335 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
336 
337 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                            \
338 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,          \
339                          CPURISCVState *env, uint32_t desc)              \
340 {                                                                        \
341     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));              \
342     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,   \
343                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);      \
344 }                                                                        \
345                                                                          \
346 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                 \
347                   CPURISCVState *env, uint32_t desc)                     \
348 {                                                                        \
349     vext_ldst_us(vd, base, env, desc, STORE_FN,                          \
350                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
351 }
352 
353 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
354 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
355 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
356 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
357 
358 /*
359  *** unit stride mask load and store, EEW = 1
360  */
361 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
362                     CPURISCVState *env, uint32_t desc)
363 {
364     /* evl = ceil(vl/8) */
365     uint8_t evl = (env->vl + 7) >> 3;
366     vext_ldst_us(vd, base, env, desc, lde_b,
367                  0, evl, GETPC(), MMU_DATA_LOAD);
368 }
369 
370 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
371                     CPURISCVState *env, uint32_t desc)
372 {
373     /* evl = ceil(vl/8) */
374     uint8_t evl = (env->vl + 7) >> 3;
375     vext_ldst_us(vd, base, env, desc, ste_b,
376                  0, evl, GETPC(), MMU_DATA_STORE);
377 }
378 
379 /*
380  *** index: access vector element from indexed memory
381  */
382 typedef target_ulong vext_get_index_addr(target_ulong base,
383         uint32_t idx, void *vs2);
384 
385 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
386 static target_ulong NAME(target_ulong base,            \
387                          uint32_t idx, void *vs2)      \
388 {                                                      \
389     return (base + *((ETYPE *)vs2 + H(idx)));          \
390 }
391 
392 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
393 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
394 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
395 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
396 
397 static inline void
398 vext_ldst_index(void *vd, void *v0, target_ulong base,
399                 void *vs2, CPURISCVState *env, uint32_t desc,
400                 vext_get_index_addr get_index_addr,
401                 vext_ldst_elem_fn *ldst_elem,
402                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
403 {
404     uint32_t i, k;
405     uint32_t nf = vext_nf(desc);
406     uint32_t vm = vext_vm(desc);
407     uint32_t max_elems = vext_max_elems(desc, esz);
408 
409     /* load bytes from guest memory */
410     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
411         if (!vm && !vext_elem_mask(v0, i)) {
412             continue;
413         }
414 
415         k = 0;
416         while (k < nf) {
417             abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
418             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
419             k++;
420         }
421     }
422     env->vstart = 0;
423 }
424 
425 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
426 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
427                   void *vs2, CPURISCVState *env, uint32_t desc)            \
428 {                                                                          \
429     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
430                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
431 }
432 
433 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
434 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
435 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
436 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
437 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
438 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
439 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
440 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
441 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
442 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
443 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
444 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
445 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
446 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
447 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
448 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
449 
450 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
451 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
452                   void *vs2, CPURISCVState *env, uint32_t desc)  \
453 {                                                                \
454     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
455                     STORE_FN, ctzl(sizeof(ETYPE)),               \
456                     GETPC(), MMU_DATA_STORE);                    \
457 }
458 
459 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
460 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
461 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
462 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
463 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
464 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
465 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
466 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
467 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
468 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
469 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
470 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
471 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
472 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
473 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
474 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
475 
476 /*
477  *** unit-stride fault-only-fisrt load instructions
478  */
479 static inline void
480 vext_ldff(void *vd, void *v0, target_ulong base,
481           CPURISCVState *env, uint32_t desc,
482           vext_ldst_elem_fn *ldst_elem,
483           uint32_t esz, uintptr_t ra)
484 {
485     void *host;
486     uint32_t i, k, vl = 0;
487     uint32_t nf = vext_nf(desc);
488     uint32_t vm = vext_vm(desc);
489     uint32_t max_elems = vext_max_elems(desc, esz);
490     target_ulong addr, offset, remain;
491 
492     /* probe every access*/
493     for (i = env->vstart; i < env->vl; i++) {
494         if (!vm && !vext_elem_mask(v0, i)) {
495             continue;
496         }
497         addr = adjust_addr(env, base + i * (nf << esz));
498         if (i == 0) {
499             probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
500         } else {
501             /* if it triggers an exception, no need to check watchpoint */
502             remain = nf << esz;
503             while (remain > 0) {
504                 offset = -(addr | TARGET_PAGE_MASK);
505                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
506                                          cpu_mmu_index(env, false));
507                 if (host) {
508 #ifdef CONFIG_USER_ONLY
509                     if (page_check_range(addr, offset, PAGE_READ) < 0) {
510                         vl = i;
511                         goto ProbeSuccess;
512                     }
513 #else
514                     probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
515 #endif
516                 } else {
517                     vl = i;
518                     goto ProbeSuccess;
519                 }
520                 if (remain <=  offset) {
521                     break;
522                 }
523                 remain -= offset;
524                 addr = adjust_addr(env, addr + offset);
525             }
526         }
527     }
528 ProbeSuccess:
529     /* load bytes from guest memory */
530     if (vl != 0) {
531         env->vl = vl;
532     }
533     for (i = env->vstart; i < env->vl; i++) {
534         k = 0;
535         if (!vm && !vext_elem_mask(v0, i)) {
536             continue;
537         }
538         while (k < nf) {
539             target_ulong addr = base + ((i * nf + k) << esz);
540             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
541             k++;
542         }
543     }
544     env->vstart = 0;
545 }
546 
547 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
548 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
549                   CPURISCVState *env, uint32_t desc)      \
550 {                                                         \
551     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
552               ctzl(sizeof(ETYPE)), GETPC());              \
553 }
554 
555 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
556 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
557 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
558 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
559 
560 #define DO_SWAP(N, M) (M)
561 #define DO_AND(N, M)  (N & M)
562 #define DO_XOR(N, M)  (N ^ M)
563 #define DO_OR(N, M)   (N | M)
564 #define DO_ADD(N, M)  (N + M)
565 
566 /* Signed min/max */
567 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
568 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
569 
570 /* Unsigned min/max */
571 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
572 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
573 
574 /*
575  *** load and store whole register instructions
576  */
577 static void
578 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
579                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
580                 MMUAccessType access_type)
581 {
582     uint32_t i, k, off, pos;
583     uint32_t nf = vext_nf(desc);
584     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
585     uint32_t max_elems = vlenb >> esz;
586 
587     k = env->vstart / max_elems;
588     off = env->vstart % max_elems;
589 
590     if (off) {
591         /* load/store rest of elements of current segment pointed by vstart */
592         for (pos = off; pos < max_elems; pos++, env->vstart++) {
593             target_ulong addr = base + ((pos + k * max_elems) << esz);
594             ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
595         }
596         k++;
597     }
598 
599     /* load/store elements for rest of segments */
600     for (; k < nf; k++) {
601         for (i = 0; i < max_elems; i++, env->vstart++) {
602             target_ulong addr = base + ((i + k * max_elems) << esz);
603             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
604         }
605     }
606 
607     env->vstart = 0;
608 }
609 
610 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
611 void HELPER(NAME)(void *vd, target_ulong base,       \
612                   CPURISCVState *env, uint32_t desc) \
613 {                                                    \
614     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
615                     ctzl(sizeof(ETYPE)), GETPC(),    \
616                     MMU_DATA_LOAD);                  \
617 }
618 
619 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
620 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
621 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
622 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
623 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
624 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
625 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
626 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
627 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
628 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
629 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
630 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
631 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
632 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
633 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
634 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
635 
636 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
637 void HELPER(NAME)(void *vd, target_ulong base,       \
638                   CPURISCVState *env, uint32_t desc) \
639 {                                                    \
640     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
641                     ctzl(sizeof(ETYPE)), GETPC(),    \
642                     MMU_DATA_STORE);                 \
643 }
644 
645 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
646 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
647 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
648 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
649 
650 /*
651  *** Vector Integer Arithmetic Instructions
652  */
653 
654 /* expand macro args before macro */
655 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
656 
657 /* (TD, T1, T2, TX1, TX2) */
658 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
659 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
660 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
661 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
662 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
663 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
664 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
665 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
666 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
667 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
668 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
669 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
670 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
671 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
672 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
673 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
674 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
675 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
676 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
677 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
678 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
679 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
680 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
681 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
682 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
683 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
684 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
685 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
686 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
687 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
688 
689 /* operation of two vector elements */
690 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
691 
692 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
693 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
694 {                                                               \
695     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
696     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
697     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
698 }
699 #define DO_SUB(N, M) (N - M)
700 #define DO_RSUB(N, M) (M - N)
701 
702 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
703 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
704 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
705 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
706 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
707 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
708 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
709 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
710 
711 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
712                        CPURISCVState *env, uint32_t desc,
713                        uint32_t esz, uint32_t dsz,
714                        opivv2_fn *fn)
715 {
716     uint32_t vm = vext_vm(desc);
717     uint32_t vl = env->vl;
718     uint32_t i;
719 
720     for (i = env->vstart; i < vl; i++) {
721         if (!vm && !vext_elem_mask(v0, i)) {
722             continue;
723         }
724         fn(vd, vs1, vs2, i);
725     }
726     env->vstart = 0;
727 }
728 
729 /* generate the helpers for OPIVV */
730 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
731 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
732                   void *vs2, CPURISCVState *env,          \
733                   uint32_t desc)                          \
734 {                                                         \
735     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
736                do_##NAME);                                \
737 }
738 
739 GEN_VEXT_VV(vadd_vv_b, 1, 1)
740 GEN_VEXT_VV(vadd_vv_h, 2, 2)
741 GEN_VEXT_VV(vadd_vv_w, 4, 4)
742 GEN_VEXT_VV(vadd_vv_d, 8, 8)
743 GEN_VEXT_VV(vsub_vv_b, 1, 1)
744 GEN_VEXT_VV(vsub_vv_h, 2, 2)
745 GEN_VEXT_VV(vsub_vv_w, 4, 4)
746 GEN_VEXT_VV(vsub_vv_d, 8, 8)
747 
748 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
749 
750 /*
751  * (T1)s1 gives the real operator type.
752  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
753  */
754 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
755 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
756 {                                                                   \
757     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
758     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
759 }
760 
761 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
762 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
763 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
764 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
765 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
766 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
767 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
768 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
769 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
770 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
771 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
772 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
773 
774 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
775                        CPURISCVState *env, uint32_t desc,
776                        uint32_t esz, uint32_t dsz,
777                        opivx2_fn fn)
778 {
779     uint32_t vm = vext_vm(desc);
780     uint32_t vl = env->vl;
781     uint32_t i;
782 
783     for (i = env->vstart; i < vl; i++) {
784         if (!vm && !vext_elem_mask(v0, i)) {
785             continue;
786         }
787         fn(vd, s1, vs2, i);
788     }
789     env->vstart = 0;
790 }
791 
792 /* generate the helpers for OPIVX */
793 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
794 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
795                   void *vs2, CPURISCVState *env,          \
796                   uint32_t desc)                          \
797 {                                                         \
798     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
799                do_##NAME);                                \
800 }
801 
802 GEN_VEXT_VX(vadd_vx_b, 1, 1)
803 GEN_VEXT_VX(vadd_vx_h, 2, 2)
804 GEN_VEXT_VX(vadd_vx_w, 4, 4)
805 GEN_VEXT_VX(vadd_vx_d, 8, 8)
806 GEN_VEXT_VX(vsub_vx_b, 1, 1)
807 GEN_VEXT_VX(vsub_vx_h, 2, 2)
808 GEN_VEXT_VX(vsub_vx_w, 4, 4)
809 GEN_VEXT_VX(vsub_vx_d, 8, 8)
810 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
811 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
812 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
813 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
814 
815 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
816 {
817     intptr_t oprsz = simd_oprsz(desc);
818     intptr_t i;
819 
820     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
821         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
822     }
823 }
824 
825 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
826 {
827     intptr_t oprsz = simd_oprsz(desc);
828     intptr_t i;
829 
830     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
831         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
832     }
833 }
834 
835 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
836 {
837     intptr_t oprsz = simd_oprsz(desc);
838     intptr_t i;
839 
840     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
841         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
842     }
843 }
844 
845 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
846 {
847     intptr_t oprsz = simd_oprsz(desc);
848     intptr_t i;
849 
850     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
851         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
852     }
853 }
854 
855 /* Vector Widening Integer Add/Subtract */
856 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
857 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
858 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
859 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
860 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
861 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
862 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
863 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
864 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
865 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
866 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
867 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
868 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
869 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
870 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
871 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
872 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
873 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
874 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
875 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
876 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
877 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
878 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
879 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
880 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
881 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
882 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
883 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
884 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
885 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
886 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
887 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
888 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
889 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
890 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
891 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
892 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
893 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
894 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
895 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
896 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
897 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
898 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
899 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
900 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
901 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
902 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
903 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
904 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
905 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
906 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
907 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
908 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
909 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
910 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
911 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
912 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
913 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
914 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
915 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
916 
917 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
918 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
919 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
920 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
921 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
922 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
923 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
924 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
925 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
926 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
927 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
928 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
929 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
930 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
931 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
932 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
933 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
934 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
935 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
936 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
937 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
938 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
939 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
940 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
941 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
942 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
943 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
944 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
945 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
946 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
947 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
948 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
949 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
950 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
951 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
952 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
953 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
954 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
955 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
956 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
957 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
958 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
959 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
960 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
961 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
962 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
963 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
964 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
965 
966 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
967 #define DO_VADC(N, M, C) (N + M + C)
968 #define DO_VSBC(N, M, C) (N - M - C)
969 
970 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
971 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
972                   CPURISCVState *env, uint32_t desc)          \
973 {                                                             \
974     uint32_t vl = env->vl;                                    \
975     uint32_t i;                                               \
976                                                               \
977     for (i = env->vstart; i < vl; i++) {                      \
978         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
979         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
980         ETYPE carry = vext_elem_mask(v0, i);                  \
981                                                               \
982         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
983     }                                                         \
984     env->vstart = 0;                                          \
985 }
986 
987 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
988 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
989 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
990 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
991 
992 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
993 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
994 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
995 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
996 
997 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
998 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
999                   CPURISCVState *env, uint32_t desc)                     \
1000 {                                                                        \
1001     uint32_t vl = env->vl;                                               \
1002     uint32_t i;                                                          \
1003                                                                          \
1004     for (i = env->vstart; i < vl; i++) {                                 \
1005         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
1006         ETYPE carry = vext_elem_mask(v0, i);                             \
1007                                                                          \
1008         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1009     }                                                                    \
1010     env->vstart = 0;                                          \
1011 }
1012 
1013 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
1014 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1015 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1016 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1017 
1018 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
1019 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1020 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1021 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1022 
1023 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
1024                           (__typeof(N))(N + M) < N)
1025 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1026 
1027 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
1028 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1029                   CPURISCVState *env, uint32_t desc)          \
1030 {                                                             \
1031     uint32_t vl = env->vl;                                    \
1032     uint32_t vm = vext_vm(desc);                              \
1033     uint32_t i;                                               \
1034                                                               \
1035     for (i = env->vstart; i < vl; i++) {                      \
1036         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1037         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1038         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1039         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1040     }                                                         \
1041     env->vstart = 0;                                          \
1042 }
1043 
1044 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1045 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1046 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1047 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1048 
1049 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1050 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1051 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1052 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1053 
1054 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1055 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1056                   void *vs2, CPURISCVState *env, uint32_t desc) \
1057 {                                                               \
1058     uint32_t vl = env->vl;                                      \
1059     uint32_t vm = vext_vm(desc);                                \
1060     uint32_t i;                                                 \
1061                                                                 \
1062     for (i = env->vstart; i < vl; i++) {                        \
1063         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1064         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1065         vext_set_elem_mask(vd, i,                               \
1066                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1067     }                                                           \
1068     env->vstart = 0;                                            \
1069 }
1070 
1071 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1072 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1073 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1074 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1075 
1076 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1077 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1078 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1079 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1080 
1081 /* Vector Bitwise Logical Instructions */
1082 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1083 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1084 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1085 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1086 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1087 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1088 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1089 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1090 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1091 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1092 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1093 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1094 GEN_VEXT_VV(vand_vv_b, 1, 1)
1095 GEN_VEXT_VV(vand_vv_h, 2, 2)
1096 GEN_VEXT_VV(vand_vv_w, 4, 4)
1097 GEN_VEXT_VV(vand_vv_d, 8, 8)
1098 GEN_VEXT_VV(vor_vv_b, 1, 1)
1099 GEN_VEXT_VV(vor_vv_h, 2, 2)
1100 GEN_VEXT_VV(vor_vv_w, 4, 4)
1101 GEN_VEXT_VV(vor_vv_d, 8, 8)
1102 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1103 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1104 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1105 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1106 
1107 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1108 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1109 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1110 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1111 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1112 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1113 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1114 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1115 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1116 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1117 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1118 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1119 GEN_VEXT_VX(vand_vx_b, 1, 1)
1120 GEN_VEXT_VX(vand_vx_h, 2, 2)
1121 GEN_VEXT_VX(vand_vx_w, 4, 4)
1122 GEN_VEXT_VX(vand_vx_d, 8, 8)
1123 GEN_VEXT_VX(vor_vx_b, 1, 1)
1124 GEN_VEXT_VX(vor_vx_h, 2, 2)
1125 GEN_VEXT_VX(vor_vx_w, 4, 4)
1126 GEN_VEXT_VX(vor_vx_d, 8, 8)
1127 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1128 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1129 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1130 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1131 
1132 /* Vector Single-Width Bit Shift Instructions */
1133 #define DO_SLL(N, M)  (N << (M))
1134 #define DO_SRL(N, M)  (N >> (M))
1135 
1136 /* generate the helpers for shift instructions with two vector operators */
1137 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1138 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1139                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1140 {                                                                         \
1141     uint32_t vm = vext_vm(desc);                                          \
1142     uint32_t vl = env->vl;                                                \
1143     uint32_t i;                                                           \
1144                                                                           \
1145     for (i = env->vstart; i < vl; i++) {                                  \
1146         if (!vm && !vext_elem_mask(v0, i)) {                              \
1147             continue;                                                     \
1148         }                                                                 \
1149         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1150         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1151         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1152     }                                                                     \
1153     env->vstart = 0;                                                      \
1154 }
1155 
1156 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1157 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1158 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1159 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1160 
1161 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1162 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1163 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1164 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1165 
1166 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1167 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1168 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1169 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1170 
1171 /* generate the helpers for shift instructions with one vector and one scalar */
1172 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1173 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1174         void *vs2, CPURISCVState *env, uint32_t desc)       \
1175 {                                                           \
1176     uint32_t vm = vext_vm(desc);                            \
1177     uint32_t vl = env->vl;                                  \
1178     uint32_t i;                                             \
1179                                                             \
1180     for (i = env->vstart; i < vl; i++) {                    \
1181         if (!vm && !vext_elem_mask(v0, i)) {                \
1182             continue;                                       \
1183         }                                                   \
1184         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1185         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1186     }                                                       \
1187     env->vstart = 0;                                        \
1188 }
1189 
1190 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1191 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1192 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1193 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1194 
1195 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1196 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1197 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1198 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1199 
1200 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1201 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1202 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1203 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1204 
1205 /* Vector Narrowing Integer Right Shift Instructions */
1206 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1207 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1208 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1209 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1210 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1211 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1212 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1213 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1214 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1215 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1216 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1217 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1218 
1219 /* Vector Integer Comparison Instructions */
1220 #define DO_MSEQ(N, M) (N == M)
1221 #define DO_MSNE(N, M) (N != M)
1222 #define DO_MSLT(N, M) (N < M)
1223 #define DO_MSLE(N, M) (N <= M)
1224 #define DO_MSGT(N, M) (N > M)
1225 
1226 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1227 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1228                   CPURISCVState *env, uint32_t desc)          \
1229 {                                                             \
1230     uint32_t vm = vext_vm(desc);                              \
1231     uint32_t vl = env->vl;                                    \
1232     uint32_t i;                                               \
1233                                                               \
1234     for (i = env->vstart; i < vl; i++) {                      \
1235         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1236         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1237         if (!vm && !vext_elem_mask(v0, i)) {                  \
1238             continue;                                         \
1239         }                                                     \
1240         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1241     }                                                         \
1242     env->vstart = 0;                                          \
1243 }
1244 
1245 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1246 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1247 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1248 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1249 
1250 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1251 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1252 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1253 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1254 
1255 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1256 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1257 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1258 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1259 
1260 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1261 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1262 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1263 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1264 
1265 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1266 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1267 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1268 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1269 
1270 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1271 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1272 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1273 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1274 
1275 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1276 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1277                   CPURISCVState *env, uint32_t desc)                \
1278 {                                                                   \
1279     uint32_t vm = vext_vm(desc);                                    \
1280     uint32_t vl = env->vl;                                          \
1281     uint32_t i;                                                     \
1282                                                                     \
1283     for (i = env->vstart; i < vl; i++) {                            \
1284         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1285         if (!vm && !vext_elem_mask(v0, i)) {                        \
1286             continue;                                               \
1287         }                                                           \
1288         vext_set_elem_mask(vd, i,                                   \
1289                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1290     }                                                               \
1291     env->vstart = 0;                                                \
1292 }
1293 
1294 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1295 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1296 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1297 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1298 
1299 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1300 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1301 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1302 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1303 
1304 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1305 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1306 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1307 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1308 
1309 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1310 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1311 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1312 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1313 
1314 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1315 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1316 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1317 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1318 
1319 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1320 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1321 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1322 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1323 
1324 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1325 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1326 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1327 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1328 
1329 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1330 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1331 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1332 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1333 
1334 /* Vector Integer Min/Max Instructions */
1335 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1336 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1337 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1338 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1339 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1340 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1341 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1342 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1343 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1344 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1345 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1346 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1347 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1348 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1349 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1350 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1351 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1352 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1353 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1354 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1355 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1356 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1357 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1358 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1359 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1360 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1361 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1362 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1363 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1364 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1365 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1366 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1367 
1368 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1369 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1370 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1371 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1372 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1373 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1374 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1375 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1376 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1377 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1378 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1379 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1380 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1381 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1382 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1383 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1384 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1385 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1386 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1387 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1388 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1389 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1390 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1391 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1392 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1393 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1394 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1395 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1396 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1397 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1398 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1399 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1400 
1401 /* Vector Single-Width Integer Multiply Instructions */
1402 #define DO_MUL(N, M) (N * M)
1403 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1404 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1405 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1406 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1407 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1408 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1409 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1410 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1411 
1412 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1413 {
1414     return (int16_t)s2 * (int16_t)s1 >> 8;
1415 }
1416 
1417 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1418 {
1419     return (int32_t)s2 * (int32_t)s1 >> 16;
1420 }
1421 
1422 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1423 {
1424     return (int64_t)s2 * (int64_t)s1 >> 32;
1425 }
1426 
1427 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1428 {
1429     uint64_t hi_64, lo_64;
1430 
1431     muls64(&lo_64, &hi_64, s1, s2);
1432     return hi_64;
1433 }
1434 
1435 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1436 {
1437     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1438 }
1439 
1440 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1441 {
1442     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1443 }
1444 
1445 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1446 {
1447     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1448 }
1449 
1450 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1451 {
1452     uint64_t hi_64, lo_64;
1453 
1454     mulu64(&lo_64, &hi_64, s2, s1);
1455     return hi_64;
1456 }
1457 
1458 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1459 {
1460     return (int16_t)s2 * (uint16_t)s1 >> 8;
1461 }
1462 
1463 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1464 {
1465     return (int32_t)s2 * (uint32_t)s1 >> 16;
1466 }
1467 
1468 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1469 {
1470     return (int64_t)s2 * (uint64_t)s1 >> 32;
1471 }
1472 
1473 /*
1474  * Let  A = signed operand,
1475  *      B = unsigned operand
1476  *      P = mulu64(A, B), unsigned product
1477  *
1478  * LET  X = 2 ** 64  - A, 2's complement of A
1479  *      SP = signed product
1480  * THEN
1481  *      IF A < 0
1482  *          SP = -X * B
1483  *             = -(2 ** 64 - A) * B
1484  *             = A * B - 2 ** 64 * B
1485  *             = P - 2 ** 64 * B
1486  *      ELSE
1487  *          SP = P
1488  * THEN
1489  *      HI_P -= (A < 0 ? B : 0)
1490  */
1491 
1492 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1493 {
1494     uint64_t hi_64, lo_64;
1495 
1496     mulu64(&lo_64, &hi_64, s2, s1);
1497 
1498     hi_64 -= s2 < 0 ? s1 : 0;
1499     return hi_64;
1500 }
1501 
1502 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1503 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1504 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1505 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1506 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1507 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1508 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1509 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1510 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1511 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1512 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1513 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1514 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1515 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1516 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1517 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1518 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1519 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1520 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1521 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1522 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1523 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1524 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1525 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1526 
1527 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1528 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1529 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1530 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1531 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1532 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1533 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1534 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1535 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1536 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1537 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1538 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1539 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1540 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1541 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1542 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1543 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1544 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1545 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1546 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1547 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1548 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1549 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1550 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1551 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1552 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1553 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1554 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1555 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1556 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1557 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1558 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1559 
1560 /* Vector Integer Divide Instructions */
1561 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1562 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1563 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1564         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1565 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1566         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1567 
1568 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1569 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1570 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1571 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1572 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1573 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1574 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1575 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1576 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1577 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1578 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1579 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1580 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1581 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1582 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1583 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1584 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1585 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1586 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1587 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1588 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1589 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1590 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1591 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1592 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1593 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1594 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1595 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1596 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1597 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1598 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1599 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1600 
1601 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1602 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1603 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1604 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1605 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1606 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1607 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1608 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1609 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1610 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1611 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1612 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1613 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1614 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1615 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1616 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1617 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1618 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1619 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1620 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1621 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1622 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1623 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1624 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1625 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1626 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1627 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1628 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1629 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1630 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1631 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1632 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1633 
1634 /* Vector Widening Integer Multiply Instructions */
1635 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1636 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1637 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1638 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1639 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1640 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1641 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1642 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1643 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1644 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1645 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1646 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1647 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1648 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1649 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1650 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1651 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1652 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1653 
1654 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1655 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1656 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1657 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1658 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1659 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1660 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1661 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1662 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1663 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1664 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1665 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1666 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1667 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1668 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1669 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1670 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1671 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1672 
1673 /* Vector Single-Width Integer Multiply-Add Instructions */
1674 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1675 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1676 {                                                                  \
1677     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1678     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1679     TD d = *((TD *)vd + HD(i));                                    \
1680     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1681 }
1682 
1683 #define DO_MACC(N, M, D) (M * N + D)
1684 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1685 #define DO_MADD(N, M, D) (M * D + N)
1686 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1687 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1688 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1689 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1690 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1691 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1692 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1693 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1694 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1695 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1696 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1697 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1698 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1699 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1700 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1701 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1702 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1703 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1704 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1705 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1706 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1707 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1708 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1709 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1710 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1711 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1712 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1713 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1714 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1715 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1716 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1717 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1718 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1719 
1720 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1721 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1722 {                                                                   \
1723     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1724     TD d = *((TD *)vd + HD(i));                                     \
1725     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1726 }
1727 
1728 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1729 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1730 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1731 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1732 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1733 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1734 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1735 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1736 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1737 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1738 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1739 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1740 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1741 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1742 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1743 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1744 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1745 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1746 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1747 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1748 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1749 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1750 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1751 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1752 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1753 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1754 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1755 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1756 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1757 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1758 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1759 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1760 
1761 /* Vector Widening Integer Multiply-Add Instructions */
1762 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1763 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1764 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1765 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1766 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1767 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1768 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1769 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1770 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1771 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1772 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1773 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1774 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1775 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1776 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1777 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1778 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1779 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1780 
1781 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1782 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1783 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1784 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1785 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1786 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1787 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1788 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1789 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1790 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1791 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1792 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1793 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1794 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1795 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1796 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1797 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1798 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1799 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1800 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1801 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1802 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1803 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1804 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1805 
1806 /* Vector Integer Merge and Move Instructions */
1807 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1808 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1809                   uint32_t desc)                                     \
1810 {                                                                    \
1811     uint32_t vl = env->vl;                                           \
1812     uint32_t i;                                                      \
1813                                                                      \
1814     for (i = env->vstart; i < vl; i++) {                             \
1815         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1816         *((ETYPE *)vd + H(i)) = s1;                                  \
1817     }                                                                \
1818     env->vstart = 0;                                                 \
1819 }
1820 
1821 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1822 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1823 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1824 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1825 
1826 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1827 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1828                   uint32_t desc)                                     \
1829 {                                                                    \
1830     uint32_t vl = env->vl;                                           \
1831     uint32_t i;                                                      \
1832                                                                      \
1833     for (i = env->vstart; i < vl; i++) {                             \
1834         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1835     }                                                                \
1836     env->vstart = 0;                                                 \
1837 }
1838 
1839 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1840 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1841 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1842 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1843 
1844 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1845 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1846                   CPURISCVState *env, uint32_t desc)                 \
1847 {                                                                    \
1848     uint32_t vl = env->vl;                                           \
1849     uint32_t i;                                                      \
1850                                                                      \
1851     for (i = env->vstart; i < vl; i++) {                             \
1852         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1853         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1854     }                                                                \
1855     env->vstart = 0;                                                 \
1856 }
1857 
1858 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1859 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1860 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1861 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1862 
1863 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1864 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1865                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1866 {                                                                    \
1867     uint32_t vl = env->vl;                                           \
1868     uint32_t i;                                                      \
1869                                                                      \
1870     for (i = env->vstart; i < vl; i++) {                             \
1871         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1872         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1873                    (ETYPE)(target_long)s1);                          \
1874         *((ETYPE *)vd + H(i)) = d;                                   \
1875     }                                                                \
1876     env->vstart = 0;                                                 \
1877 }
1878 
1879 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1880 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1881 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1882 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1883 
1884 /*
1885  *** Vector Fixed-Point Arithmetic Instructions
1886  */
1887 
1888 /* Vector Single-Width Saturating Add and Subtract */
1889 
1890 /*
1891  * As fixed point instructions probably have round mode and saturation,
1892  * define common macros for fixed point here.
1893  */
1894 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1895                           CPURISCVState *env, int vxrm);
1896 
1897 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1898 static inline void                                                  \
1899 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1900           CPURISCVState *env, int vxrm)                             \
1901 {                                                                   \
1902     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1903     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1904     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1905 }
1906 
1907 static inline void
1908 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1909              CPURISCVState *env,
1910              uint32_t vl, uint32_t vm, int vxrm,
1911              opivv2_rm_fn *fn)
1912 {
1913     for (uint32_t i = env->vstart; i < vl; i++) {
1914         if (!vm && !vext_elem_mask(v0, i)) {
1915             continue;
1916         }
1917         fn(vd, vs1, vs2, i, env, vxrm);
1918     }
1919     env->vstart = 0;
1920 }
1921 
1922 static inline void
1923 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1924              CPURISCVState *env,
1925              uint32_t desc, uint32_t esz, uint32_t dsz,
1926              opivv2_rm_fn *fn)
1927 {
1928     uint32_t vm = vext_vm(desc);
1929     uint32_t vl = env->vl;
1930 
1931     switch (env->vxrm) {
1932     case 0: /* rnu */
1933         vext_vv_rm_1(vd, v0, vs1, vs2,
1934                      env, vl, vm, 0, fn);
1935         break;
1936     case 1: /* rne */
1937         vext_vv_rm_1(vd, v0, vs1, vs2,
1938                      env, vl, vm, 1, fn);
1939         break;
1940     case 2: /* rdn */
1941         vext_vv_rm_1(vd, v0, vs1, vs2,
1942                      env, vl, vm, 2, fn);
1943         break;
1944     default: /* rod */
1945         vext_vv_rm_1(vd, v0, vs1, vs2,
1946                      env, vl, vm, 3, fn);
1947         break;
1948     }
1949 }
1950 
1951 /* generate helpers for fixed point instructions with OPIVV format */
1952 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1953 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1954                   CPURISCVState *env, uint32_t desc)            \
1955 {                                                               \
1956     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1957                  do_##NAME);                                    \
1958 }
1959 
1960 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1961 {
1962     uint8_t res = a + b;
1963     if (res < a) {
1964         res = UINT8_MAX;
1965         env->vxsat = 0x1;
1966     }
1967     return res;
1968 }
1969 
1970 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1971                                uint16_t b)
1972 {
1973     uint16_t res = a + b;
1974     if (res < a) {
1975         res = UINT16_MAX;
1976         env->vxsat = 0x1;
1977     }
1978     return res;
1979 }
1980 
1981 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1982                                uint32_t b)
1983 {
1984     uint32_t res = a + b;
1985     if (res < a) {
1986         res = UINT32_MAX;
1987         env->vxsat = 0x1;
1988     }
1989     return res;
1990 }
1991 
1992 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1993                                uint64_t b)
1994 {
1995     uint64_t res = a + b;
1996     if (res < a) {
1997         res = UINT64_MAX;
1998         env->vxsat = 0x1;
1999     }
2000     return res;
2001 }
2002 
2003 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2004 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2005 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2006 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2007 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
2008 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
2009 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
2010 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
2011 
2012 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2013                           CPURISCVState *env, int vxrm);
2014 
2015 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2016 static inline void                                                  \
2017 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2018           CPURISCVState *env, int vxrm)                             \
2019 {                                                                   \
2020     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2021     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2022 }
2023 
2024 static inline void
2025 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2026              CPURISCVState *env,
2027              uint32_t vl, uint32_t vm, int vxrm,
2028              opivx2_rm_fn *fn)
2029 {
2030     for (uint32_t i = env->vstart; i < vl; i++) {
2031         if (!vm && !vext_elem_mask(v0, i)) {
2032             continue;
2033         }
2034         fn(vd, s1, vs2, i, env, vxrm);
2035     }
2036     env->vstart = 0;
2037 }
2038 
2039 static inline void
2040 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2041              CPURISCVState *env,
2042              uint32_t desc, uint32_t esz, uint32_t dsz,
2043              opivx2_rm_fn *fn)
2044 {
2045     uint32_t vm = vext_vm(desc);
2046     uint32_t vl = env->vl;
2047 
2048     switch (env->vxrm) {
2049     case 0: /* rnu */
2050         vext_vx_rm_1(vd, v0, s1, vs2,
2051                      env, vl, vm, 0, fn);
2052         break;
2053     case 1: /* rne */
2054         vext_vx_rm_1(vd, v0, s1, vs2,
2055                      env, vl, vm, 1, fn);
2056         break;
2057     case 2: /* rdn */
2058         vext_vx_rm_1(vd, v0, s1, vs2,
2059                      env, vl, vm, 2, fn);
2060         break;
2061     default: /* rod */
2062         vext_vx_rm_1(vd, v0, s1, vs2,
2063                      env, vl, vm, 3, fn);
2064         break;
2065     }
2066 }
2067 
2068 /* generate helpers for fixed point instructions with OPIVX format */
2069 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2070 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2071         void *vs2, CPURISCVState *env, uint32_t desc)     \
2072 {                                                         \
2073     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2074                  do_##NAME);                              \
2075 }
2076 
2077 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2078 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2079 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2080 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2081 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2082 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2083 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2084 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2085 
2086 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2087 {
2088     int8_t res = a + b;
2089     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2090         res = a > 0 ? INT8_MAX : INT8_MIN;
2091         env->vxsat = 0x1;
2092     }
2093     return res;
2094 }
2095 
2096 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2097 {
2098     int16_t res = a + b;
2099     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2100         res = a > 0 ? INT16_MAX : INT16_MIN;
2101         env->vxsat = 0x1;
2102     }
2103     return res;
2104 }
2105 
2106 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2107 {
2108     int32_t res = a + b;
2109     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2110         res = a > 0 ? INT32_MAX : INT32_MIN;
2111         env->vxsat = 0x1;
2112     }
2113     return res;
2114 }
2115 
2116 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2117 {
2118     int64_t res = a + b;
2119     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2120         res = a > 0 ? INT64_MAX : INT64_MIN;
2121         env->vxsat = 0x1;
2122     }
2123     return res;
2124 }
2125 
2126 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2127 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2128 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2129 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2130 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2131 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2132 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2133 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2134 
2135 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2136 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2137 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2138 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2139 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2140 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2141 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2142 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2143 
2144 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2145 {
2146     uint8_t res = a - b;
2147     if (res > a) {
2148         res = 0;
2149         env->vxsat = 0x1;
2150     }
2151     return res;
2152 }
2153 
2154 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2155                                uint16_t b)
2156 {
2157     uint16_t res = a - b;
2158     if (res > a) {
2159         res = 0;
2160         env->vxsat = 0x1;
2161     }
2162     return res;
2163 }
2164 
2165 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2166                                uint32_t b)
2167 {
2168     uint32_t res = a - b;
2169     if (res > a) {
2170         res = 0;
2171         env->vxsat = 0x1;
2172     }
2173     return res;
2174 }
2175 
2176 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2177                                uint64_t b)
2178 {
2179     uint64_t res = a - b;
2180     if (res > a) {
2181         res = 0;
2182         env->vxsat = 0x1;
2183     }
2184     return res;
2185 }
2186 
2187 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2188 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2189 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2190 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2191 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2192 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2193 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2194 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2195 
2196 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2197 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2198 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2199 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2200 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2201 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2202 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2203 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2204 
2205 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2206 {
2207     int8_t res = a - b;
2208     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2209         res = a >= 0 ? INT8_MAX : INT8_MIN;
2210         env->vxsat = 0x1;
2211     }
2212     return res;
2213 }
2214 
2215 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2216 {
2217     int16_t res = a - b;
2218     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2219         res = a >= 0 ? INT16_MAX : INT16_MIN;
2220         env->vxsat = 0x1;
2221     }
2222     return res;
2223 }
2224 
2225 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2226 {
2227     int32_t res = a - b;
2228     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2229         res = a >= 0 ? INT32_MAX : INT32_MIN;
2230         env->vxsat = 0x1;
2231     }
2232     return res;
2233 }
2234 
2235 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2236 {
2237     int64_t res = a - b;
2238     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2239         res = a >= 0 ? INT64_MAX : INT64_MIN;
2240         env->vxsat = 0x1;
2241     }
2242     return res;
2243 }
2244 
2245 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2246 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2247 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2248 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2249 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2250 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2251 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2252 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2253 
2254 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2255 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2256 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2257 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2258 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2259 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2260 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2261 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2262 
2263 /* Vector Single-Width Averaging Add and Subtract */
2264 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2265 {
2266     uint8_t d = extract64(v, shift, 1);
2267     uint8_t d1;
2268     uint64_t D1, D2;
2269 
2270     if (shift == 0 || shift > 64) {
2271         return 0;
2272     }
2273 
2274     d1 = extract64(v, shift - 1, 1);
2275     D1 = extract64(v, 0, shift);
2276     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2277         return d1;
2278     } else if (vxrm == 1) { /* round-to-nearest-even */
2279         if (shift > 1) {
2280             D2 = extract64(v, 0, shift - 1);
2281             return d1 & ((D2 != 0) | d);
2282         } else {
2283             return d1 & d;
2284         }
2285     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2286         return !d & (D1 != 0);
2287     }
2288     return 0; /* round-down (truncate) */
2289 }
2290 
2291 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2292 {
2293     int64_t res = (int64_t)a + b;
2294     uint8_t round = get_round(vxrm, res, 1);
2295 
2296     return (res >> 1) + round;
2297 }
2298 
2299 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2300 {
2301     int64_t res = a + b;
2302     uint8_t round = get_round(vxrm, res, 1);
2303     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2304 
2305     /* With signed overflow, bit 64 is inverse of bit 63. */
2306     return ((res >> 1) ^ over) + round;
2307 }
2308 
2309 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2310 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2311 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2312 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2313 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2314 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2315 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2316 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2317 
2318 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2319 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2320 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2321 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2322 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2323 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2324 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2325 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2326 
2327 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2328                                uint32_t a, uint32_t b)
2329 {
2330     uint64_t res = (uint64_t)a + b;
2331     uint8_t round = get_round(vxrm, res, 1);
2332 
2333     return (res >> 1) + round;
2334 }
2335 
2336 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2337                                uint64_t a, uint64_t b)
2338 {
2339     uint64_t res = a + b;
2340     uint8_t round = get_round(vxrm, res, 1);
2341     uint64_t over = (uint64_t)(res < a) << 63;
2342 
2343     return ((res >> 1) | over) + round;
2344 }
2345 
2346 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2347 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2348 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2349 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2350 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2351 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2352 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2353 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2354 
2355 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2356 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2357 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2358 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2359 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2360 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2361 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2362 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2363 
2364 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2365 {
2366     int64_t res = (int64_t)a - b;
2367     uint8_t round = get_round(vxrm, res, 1);
2368 
2369     return (res >> 1) + round;
2370 }
2371 
2372 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2373 {
2374     int64_t res = (int64_t)a - b;
2375     uint8_t round = get_round(vxrm, res, 1);
2376     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2377 
2378     /* With signed overflow, bit 64 is inverse of bit 63. */
2379     return ((res >> 1) ^ over) + round;
2380 }
2381 
2382 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2383 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2384 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2385 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2386 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2387 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2388 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2389 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2390 
2391 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2392 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2393 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2394 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2395 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2396 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2397 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2398 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2399 
2400 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2401                                uint32_t a, uint32_t b)
2402 {
2403     int64_t res = (int64_t)a - b;
2404     uint8_t round = get_round(vxrm, res, 1);
2405 
2406     return (res >> 1) + round;
2407 }
2408 
2409 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2410                                uint64_t a, uint64_t b)
2411 {
2412     uint64_t res = (uint64_t)a - b;
2413     uint8_t round = get_round(vxrm, res, 1);
2414     uint64_t over = (uint64_t)(res > a) << 63;
2415 
2416     return ((res >> 1) | over) + round;
2417 }
2418 
2419 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2420 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2421 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2422 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2423 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2424 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2425 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2426 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2427 
2428 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2429 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2430 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2431 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2432 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2433 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2434 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2435 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2436 
2437 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2438 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2439 {
2440     uint8_t round;
2441     int16_t res;
2442 
2443     res = (int16_t)a * (int16_t)b;
2444     round = get_round(vxrm, res, 7);
2445     res   = (res >> 7) + round;
2446 
2447     if (res > INT8_MAX) {
2448         env->vxsat = 0x1;
2449         return INT8_MAX;
2450     } else if (res < INT8_MIN) {
2451         env->vxsat = 0x1;
2452         return INT8_MIN;
2453     } else {
2454         return res;
2455     }
2456 }
2457 
2458 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2459 {
2460     uint8_t round;
2461     int32_t res;
2462 
2463     res = (int32_t)a * (int32_t)b;
2464     round = get_round(vxrm, res, 15);
2465     res   = (res >> 15) + round;
2466 
2467     if (res > INT16_MAX) {
2468         env->vxsat = 0x1;
2469         return INT16_MAX;
2470     } else if (res < INT16_MIN) {
2471         env->vxsat = 0x1;
2472         return INT16_MIN;
2473     } else {
2474         return res;
2475     }
2476 }
2477 
2478 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2479 {
2480     uint8_t round;
2481     int64_t res;
2482 
2483     res = (int64_t)a * (int64_t)b;
2484     round = get_round(vxrm, res, 31);
2485     res   = (res >> 31) + round;
2486 
2487     if (res > INT32_MAX) {
2488         env->vxsat = 0x1;
2489         return INT32_MAX;
2490     } else if (res < INT32_MIN) {
2491         env->vxsat = 0x1;
2492         return INT32_MIN;
2493     } else {
2494         return res;
2495     }
2496 }
2497 
2498 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2499 {
2500     uint8_t round;
2501     uint64_t hi_64, lo_64;
2502     int64_t res;
2503 
2504     if (a == INT64_MIN && b == INT64_MIN) {
2505         env->vxsat = 1;
2506         return INT64_MAX;
2507     }
2508 
2509     muls64(&lo_64, &hi_64, a, b);
2510     round = get_round(vxrm, lo_64, 63);
2511     /*
2512      * Cannot overflow, as there are always
2513      * 2 sign bits after multiply.
2514      */
2515     res = (hi_64 << 1) | (lo_64 >> 63);
2516     if (round) {
2517         if (res == INT64_MAX) {
2518             env->vxsat = 1;
2519         } else {
2520             res += 1;
2521         }
2522     }
2523     return res;
2524 }
2525 
2526 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2527 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2528 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2529 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2530 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2531 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2532 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2533 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2534 
2535 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2536 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2537 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2538 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2539 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2540 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2541 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2542 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2543 
2544 /* Vector Single-Width Scaling Shift Instructions */
2545 static inline uint8_t
2546 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2547 {
2548     uint8_t round, shift = b & 0x7;
2549     uint8_t res;
2550 
2551     round = get_round(vxrm, a, shift);
2552     res   = (a >> shift)  + round;
2553     return res;
2554 }
2555 static inline uint16_t
2556 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2557 {
2558     uint8_t round, shift = b & 0xf;
2559     uint16_t res;
2560 
2561     round = get_round(vxrm, a, shift);
2562     res   = (a >> shift)  + round;
2563     return res;
2564 }
2565 static inline uint32_t
2566 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2567 {
2568     uint8_t round, shift = b & 0x1f;
2569     uint32_t res;
2570 
2571     round = get_round(vxrm, a, shift);
2572     res   = (a >> shift)  + round;
2573     return res;
2574 }
2575 static inline uint64_t
2576 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2577 {
2578     uint8_t round, shift = b & 0x3f;
2579     uint64_t res;
2580 
2581     round = get_round(vxrm, a, shift);
2582     res   = (a >> shift)  + round;
2583     return res;
2584 }
2585 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2586 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2587 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2588 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2589 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2590 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2591 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2592 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2593 
2594 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2595 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2596 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2597 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2598 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2599 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2600 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2601 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2602 
2603 static inline int8_t
2604 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2605 {
2606     uint8_t round, shift = b & 0x7;
2607     int8_t res;
2608 
2609     round = get_round(vxrm, a, shift);
2610     res   = (a >> shift)  + round;
2611     return res;
2612 }
2613 static inline int16_t
2614 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2615 {
2616     uint8_t round, shift = b & 0xf;
2617     int16_t res;
2618 
2619     round = get_round(vxrm, a, shift);
2620     res   = (a >> shift)  + round;
2621     return res;
2622 }
2623 static inline int32_t
2624 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2625 {
2626     uint8_t round, shift = b & 0x1f;
2627     int32_t res;
2628 
2629     round = get_round(vxrm, a, shift);
2630     res   = (a >> shift)  + round;
2631     return res;
2632 }
2633 static inline int64_t
2634 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2635 {
2636     uint8_t round, shift = b & 0x3f;
2637     int64_t res;
2638 
2639     round = get_round(vxrm, a, shift);
2640     res   = (a >> shift)  + round;
2641     return res;
2642 }
2643 
2644 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2645 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2646 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2647 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2648 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2649 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2650 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2651 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2652 
2653 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2654 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2655 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2656 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2657 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2658 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2659 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2660 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2661 
2662 /* Vector Narrowing Fixed-Point Clip Instructions */
2663 static inline int8_t
2664 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2665 {
2666     uint8_t round, shift = b & 0xf;
2667     int16_t res;
2668 
2669     round = get_round(vxrm, a, shift);
2670     res   = (a >> shift)  + round;
2671     if (res > INT8_MAX) {
2672         env->vxsat = 0x1;
2673         return INT8_MAX;
2674     } else if (res < INT8_MIN) {
2675         env->vxsat = 0x1;
2676         return INT8_MIN;
2677     } else {
2678         return res;
2679     }
2680 }
2681 
2682 static inline int16_t
2683 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2684 {
2685     uint8_t round, shift = b & 0x1f;
2686     int32_t res;
2687 
2688     round = get_round(vxrm, a, shift);
2689     res   = (a >> shift)  + round;
2690     if (res > INT16_MAX) {
2691         env->vxsat = 0x1;
2692         return INT16_MAX;
2693     } else if (res < INT16_MIN) {
2694         env->vxsat = 0x1;
2695         return INT16_MIN;
2696     } else {
2697         return res;
2698     }
2699 }
2700 
2701 static inline int32_t
2702 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2703 {
2704     uint8_t round, shift = b & 0x3f;
2705     int64_t res;
2706 
2707     round = get_round(vxrm, a, shift);
2708     res   = (a >> shift)  + round;
2709     if (res > INT32_MAX) {
2710         env->vxsat = 0x1;
2711         return INT32_MAX;
2712     } else if (res < INT32_MIN) {
2713         env->vxsat = 0x1;
2714         return INT32_MIN;
2715     } else {
2716         return res;
2717     }
2718 }
2719 
2720 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2721 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2722 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2723 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2724 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2725 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2726 
2727 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2728 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2729 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2730 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2731 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2732 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2733 
2734 static inline uint8_t
2735 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2736 {
2737     uint8_t round, shift = b & 0xf;
2738     uint16_t res;
2739 
2740     round = get_round(vxrm, a, shift);
2741     res   = (a >> shift)  + round;
2742     if (res > UINT8_MAX) {
2743         env->vxsat = 0x1;
2744         return UINT8_MAX;
2745     } else {
2746         return res;
2747     }
2748 }
2749 
2750 static inline uint16_t
2751 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2752 {
2753     uint8_t round, shift = b & 0x1f;
2754     uint32_t res;
2755 
2756     round = get_round(vxrm, a, shift);
2757     res   = (a >> shift)  + round;
2758     if (res > UINT16_MAX) {
2759         env->vxsat = 0x1;
2760         return UINT16_MAX;
2761     } else {
2762         return res;
2763     }
2764 }
2765 
2766 static inline uint32_t
2767 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2768 {
2769     uint8_t round, shift = b & 0x3f;
2770     uint64_t res;
2771 
2772     round = get_round(vxrm, a, shift);
2773     res   = (a >> shift)  + round;
2774     if (res > UINT32_MAX) {
2775         env->vxsat = 0x1;
2776         return UINT32_MAX;
2777     } else {
2778         return res;
2779     }
2780 }
2781 
2782 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2783 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2784 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2785 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2786 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2787 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2788 
2789 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2790 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2791 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2792 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2793 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2794 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2795 
2796 /*
2797  *** Vector Float Point Arithmetic Instructions
2798  */
2799 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2800 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2801 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2802                       CPURISCVState *env)                      \
2803 {                                                              \
2804     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2805     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2806     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2807 }
2808 
2809 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2810 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2811                   void *vs2, CPURISCVState *env,          \
2812                   uint32_t desc)                          \
2813 {                                                         \
2814     uint32_t vm = vext_vm(desc);                          \
2815     uint32_t vl = env->vl;                                \
2816     uint32_t i;                                           \
2817                                                           \
2818     for (i = env->vstart; i < vl; i++) {                  \
2819         if (!vm && !vext_elem_mask(v0, i)) {              \
2820             continue;                                     \
2821         }                                                 \
2822         do_##NAME(vd, vs1, vs2, i, env);                  \
2823     }                                                     \
2824     env->vstart = 0;                                      \
2825 }
2826 
2827 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2828 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2829 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2830 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2831 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2832 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2833 
2834 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2835 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2836                       CPURISCVState *env)                      \
2837 {                                                              \
2838     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2839     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2840 }
2841 
2842 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2843 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2844                   void *vs2, CPURISCVState *env,          \
2845                   uint32_t desc)                          \
2846 {                                                         \
2847     uint32_t vm = vext_vm(desc);                          \
2848     uint32_t vl = env->vl;                                \
2849     uint32_t i;                                           \
2850                                                           \
2851     for (i = env->vstart; i < vl; i++) {                  \
2852         if (!vm && !vext_elem_mask(v0, i)) {              \
2853             continue;                                     \
2854         }                                                 \
2855         do_##NAME(vd, s1, vs2, i, env);                   \
2856     }                                                     \
2857     env->vstart = 0;                                      \
2858 }
2859 
2860 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2861 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2862 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2863 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2864 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2865 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2866 
2867 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2868 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2869 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2870 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2871 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2872 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2873 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2874 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2875 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2876 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2877 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2878 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2879 
2880 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2881 {
2882     return float16_sub(b, a, s);
2883 }
2884 
2885 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2886 {
2887     return float32_sub(b, a, s);
2888 }
2889 
2890 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2891 {
2892     return float64_sub(b, a, s);
2893 }
2894 
2895 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2896 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2897 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2898 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2899 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2900 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2901 
2902 /* Vector Widening Floating-Point Add/Subtract Instructions */
2903 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2904 {
2905     return float32_add(float16_to_float32(a, true, s),
2906             float16_to_float32(b, true, s), s);
2907 }
2908 
2909 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2910 {
2911     return float64_add(float32_to_float64(a, s),
2912             float32_to_float64(b, s), s);
2913 
2914 }
2915 
2916 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2917 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2918 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2919 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2920 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2921 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2922 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2923 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2924 
2925 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2926 {
2927     return float32_sub(float16_to_float32(a, true, s),
2928             float16_to_float32(b, true, s), s);
2929 }
2930 
2931 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2932 {
2933     return float64_sub(float32_to_float64(a, s),
2934             float32_to_float64(b, s), s);
2935 
2936 }
2937 
2938 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2939 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2940 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2941 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2942 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2943 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2944 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2945 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2946 
2947 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2948 {
2949     return float32_add(a, float16_to_float32(b, true, s), s);
2950 }
2951 
2952 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2953 {
2954     return float64_add(a, float32_to_float64(b, s), s);
2955 }
2956 
2957 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2958 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2959 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2960 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2961 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2962 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2963 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2964 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2965 
2966 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2967 {
2968     return float32_sub(a, float16_to_float32(b, true, s), s);
2969 }
2970 
2971 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2972 {
2973     return float64_sub(a, float32_to_float64(b, s), s);
2974 }
2975 
2976 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2977 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2978 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2979 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2980 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2981 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2982 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2983 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2984 
2985 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2986 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2987 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2988 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2989 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2990 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2991 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2992 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2993 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2994 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2995 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2996 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2997 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2998 
2999 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3000 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3001 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3002 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
3003 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
3004 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
3005 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3006 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3007 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3008 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
3009 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3010 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3011 
3012 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3013 {
3014     return float16_div(b, a, s);
3015 }
3016 
3017 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3018 {
3019     return float32_div(b, a, s);
3020 }
3021 
3022 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3023 {
3024     return float64_div(b, a, s);
3025 }
3026 
3027 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3028 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3029 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3030 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3031 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3032 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3033 
3034 /* Vector Widening Floating-Point Multiply */
3035 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3036 {
3037     return float32_mul(float16_to_float32(a, true, s),
3038             float16_to_float32(b, true, s), s);
3039 }
3040 
3041 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3042 {
3043     return float64_mul(float32_to_float64(a, s),
3044             float32_to_float64(b, s), s);
3045 
3046 }
3047 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3048 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3049 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3050 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3051 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3052 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3053 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3054 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3055 
3056 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3057 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3058 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3059         CPURISCVState *env)                                        \
3060 {                                                                  \
3061     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3062     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3063     TD d = *((TD *)vd + HD(i));                                    \
3064     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3065 }
3066 
3067 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3068 {
3069     return float16_muladd(a, b, d, 0, s);
3070 }
3071 
3072 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3073 {
3074     return float32_muladd(a, b, d, 0, s);
3075 }
3076 
3077 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3078 {
3079     return float64_muladd(a, b, d, 0, s);
3080 }
3081 
3082 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3083 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3084 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3085 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3086 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3087 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3088 
3089 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3090 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3091         CPURISCVState *env)                                       \
3092 {                                                                 \
3093     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3094     TD d = *((TD *)vd + HD(i));                                   \
3095     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3096 }
3097 
3098 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3099 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3100 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3101 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3102 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3103 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3104 
3105 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3106 {
3107     return float16_muladd(a, b, d,
3108             float_muladd_negate_c | float_muladd_negate_product, s);
3109 }
3110 
3111 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3112 {
3113     return float32_muladd(a, b, d,
3114             float_muladd_negate_c | float_muladd_negate_product, s);
3115 }
3116 
3117 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3118 {
3119     return float64_muladd(a, b, d,
3120             float_muladd_negate_c | float_muladd_negate_product, s);
3121 }
3122 
3123 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3124 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3125 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3126 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3127 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3128 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3129 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3130 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3131 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3132 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3133 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3134 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3135 
3136 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3137 {
3138     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3139 }
3140 
3141 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3142 {
3143     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3144 }
3145 
3146 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3147 {
3148     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3149 }
3150 
3151 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3152 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3153 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3154 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3155 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3156 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3157 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3158 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3159 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3160 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3161 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3162 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3163 
3164 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3165 {
3166     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3167 }
3168 
3169 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3170 {
3171     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3172 }
3173 
3174 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3175 {
3176     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3177 }
3178 
3179 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3180 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3181 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3182 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3183 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3184 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3185 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3186 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3187 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3188 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3189 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3190 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3191 
3192 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3193 {
3194     return float16_muladd(d, b, a, 0, s);
3195 }
3196 
3197 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3198 {
3199     return float32_muladd(d, b, a, 0, s);
3200 }
3201 
3202 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3203 {
3204     return float64_muladd(d, b, a, 0, s);
3205 }
3206 
3207 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3208 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3209 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3210 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3211 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3212 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3213 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3214 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3215 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3216 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3217 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3218 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3219 
3220 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3221 {
3222     return float16_muladd(d, b, a,
3223             float_muladd_negate_c | float_muladd_negate_product, s);
3224 }
3225 
3226 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3227 {
3228     return float32_muladd(d, b, a,
3229             float_muladd_negate_c | float_muladd_negate_product, s);
3230 }
3231 
3232 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3233 {
3234     return float64_muladd(d, b, a,
3235             float_muladd_negate_c | float_muladd_negate_product, s);
3236 }
3237 
3238 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3239 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3240 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3241 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3242 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3243 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3244 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3245 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3246 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3247 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3248 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3249 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3250 
3251 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3252 {
3253     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3254 }
3255 
3256 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3257 {
3258     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3259 }
3260 
3261 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3262 {
3263     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3264 }
3265 
3266 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3267 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3268 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3269 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3270 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3271 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3272 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3273 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3274 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3275 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3276 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3277 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3278 
3279 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3280 {
3281     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3282 }
3283 
3284 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3285 {
3286     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3287 }
3288 
3289 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3290 {
3291     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3292 }
3293 
3294 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3295 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3296 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3297 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3298 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3299 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3300 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3301 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3302 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3303 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3304 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3305 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3306 
3307 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3308 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3309 {
3310     return float32_muladd(float16_to_float32(a, true, s),
3311                         float16_to_float32(b, true, s), d, 0, s);
3312 }
3313 
3314 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3315 {
3316     return float64_muladd(float32_to_float64(a, s),
3317                         float32_to_float64(b, s), d, 0, s);
3318 }
3319 
3320 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3321 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3322 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3323 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3324 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3325 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3326 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3327 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3328 
3329 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3330 {
3331     return float32_muladd(float16_to_float32(a, true, s),
3332                         float16_to_float32(b, true, s), d,
3333                         float_muladd_negate_c | float_muladd_negate_product, s);
3334 }
3335 
3336 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3337 {
3338     return float64_muladd(float32_to_float64(a, s),
3339                         float32_to_float64(b, s), d,
3340                         float_muladd_negate_c | float_muladd_negate_product, s);
3341 }
3342 
3343 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3344 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3345 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3346 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3347 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3348 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3349 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3350 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3351 
3352 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3353 {
3354     return float32_muladd(float16_to_float32(a, true, s),
3355                         float16_to_float32(b, true, s), d,
3356                         float_muladd_negate_c, s);
3357 }
3358 
3359 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3360 {
3361     return float64_muladd(float32_to_float64(a, s),
3362                         float32_to_float64(b, s), d,
3363                         float_muladd_negate_c, s);
3364 }
3365 
3366 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3367 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3368 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3369 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3370 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3371 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3372 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3373 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3374 
3375 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3376 {
3377     return float32_muladd(float16_to_float32(a, true, s),
3378                         float16_to_float32(b, true, s), d,
3379                         float_muladd_negate_product, s);
3380 }
3381 
3382 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3383 {
3384     return float64_muladd(float32_to_float64(a, s),
3385                         float32_to_float64(b, s), d,
3386                         float_muladd_negate_product, s);
3387 }
3388 
3389 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3390 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3391 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3392 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3393 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3394 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3395 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3396 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3397 
3398 /* Vector Floating-Point Square-Root Instruction */
3399 /* (TD, T2, TX2) */
3400 #define OP_UU_H uint16_t, uint16_t, uint16_t
3401 #define OP_UU_W uint32_t, uint32_t, uint32_t
3402 #define OP_UU_D uint64_t, uint64_t, uint64_t
3403 
3404 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3405 static void do_##NAME(void *vd, void *vs2, int i,      \
3406         CPURISCVState *env)                            \
3407 {                                                      \
3408     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3409     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3410 }
3411 
3412 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3413 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3414         CPURISCVState *env, uint32_t desc)             \
3415 {                                                      \
3416     uint32_t vm = vext_vm(desc);                       \
3417     uint32_t vl = env->vl;                             \
3418     uint32_t i;                                        \
3419                                                        \
3420     if (vl == 0) {                                     \
3421         return;                                        \
3422     }                                                  \
3423     for (i = env->vstart; i < vl; i++) {               \
3424         if (!vm && !vext_elem_mask(v0, i)) {           \
3425             continue;                                  \
3426         }                                              \
3427         do_##NAME(vd, vs2, i, env);                    \
3428     }                                                  \
3429     env->vstart = 0;                                   \
3430 }
3431 
3432 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3433 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3434 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3435 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3436 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3437 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3438 
3439 /*
3440  * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3441  *
3442  * Adapted from riscv-v-spec recip.c:
3443  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3444  */
3445 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3446 {
3447     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3448     uint64_t exp = extract64(f, frac_size, exp_size);
3449     uint64_t frac = extract64(f, 0, frac_size);
3450 
3451     const uint8_t lookup_table[] = {
3452         52, 51, 50, 48, 47, 46, 44, 43,
3453         42, 41, 40, 39, 38, 36, 35, 34,
3454         33, 32, 31, 30, 30, 29, 28, 27,
3455         26, 25, 24, 23, 23, 22, 21, 20,
3456         19, 19, 18, 17, 16, 16, 15, 14,
3457         14, 13, 12, 12, 11, 10, 10, 9,
3458         9, 8, 7, 7, 6, 6, 5, 4,
3459         4, 3, 3, 2, 2, 1, 1, 0,
3460         127, 125, 123, 121, 119, 118, 116, 114,
3461         113, 111, 109, 108, 106, 105, 103, 102,
3462         100, 99, 97, 96, 95, 93, 92, 91,
3463         90, 88, 87, 86, 85, 84, 83, 82,
3464         80, 79, 78, 77, 76, 75, 74, 73,
3465         72, 71, 70, 70, 69, 68, 67, 66,
3466         65, 64, 63, 63, 62, 61, 60, 59,
3467         59, 58, 57, 56, 56, 55, 54, 53
3468     };
3469     const int precision = 7;
3470 
3471     if (exp == 0 && frac != 0) { /* subnormal */
3472         /* Normalize the subnormal. */
3473         while (extract64(frac, frac_size - 1, 1) == 0) {
3474             exp--;
3475             frac <<= 1;
3476         }
3477 
3478         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3479     }
3480 
3481     int idx = ((exp & 1) << (precision - 1)) |
3482                 (frac >> (frac_size - precision + 1));
3483     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3484                             (frac_size - precision);
3485     uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3486 
3487     uint64_t val = 0;
3488     val = deposit64(val, 0, frac_size, out_frac);
3489     val = deposit64(val, frac_size, exp_size, out_exp);
3490     val = deposit64(val, frac_size + exp_size, 1, sign);
3491     return val;
3492 }
3493 
3494 static float16 frsqrt7_h(float16 f, float_status *s)
3495 {
3496     int exp_size = 5, frac_size = 10;
3497     bool sign = float16_is_neg(f);
3498 
3499     /*
3500      * frsqrt7(sNaN) = canonical NaN
3501      * frsqrt7(-inf) = canonical NaN
3502      * frsqrt7(-normal) = canonical NaN
3503      * frsqrt7(-subnormal) = canonical NaN
3504      */
3505     if (float16_is_signaling_nan(f, s) ||
3506             (float16_is_infinity(f) && sign) ||
3507             (float16_is_normal(f) && sign) ||
3508             (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3509         s->float_exception_flags |= float_flag_invalid;
3510         return float16_default_nan(s);
3511     }
3512 
3513     /* frsqrt7(qNaN) = canonical NaN */
3514     if (float16_is_quiet_nan(f, s)) {
3515         return float16_default_nan(s);
3516     }
3517 
3518     /* frsqrt7(+-0) = +-inf */
3519     if (float16_is_zero(f)) {
3520         s->float_exception_flags |= float_flag_divbyzero;
3521         return float16_set_sign(float16_infinity, sign);
3522     }
3523 
3524     /* frsqrt7(+inf) = +0 */
3525     if (float16_is_infinity(f) && !sign) {
3526         return float16_set_sign(float16_zero, sign);
3527     }
3528 
3529     /* +normal, +subnormal */
3530     uint64_t val = frsqrt7(f, exp_size, frac_size);
3531     return make_float16(val);
3532 }
3533 
3534 static float32 frsqrt7_s(float32 f, float_status *s)
3535 {
3536     int exp_size = 8, frac_size = 23;
3537     bool sign = float32_is_neg(f);
3538 
3539     /*
3540      * frsqrt7(sNaN) = canonical NaN
3541      * frsqrt7(-inf) = canonical NaN
3542      * frsqrt7(-normal) = canonical NaN
3543      * frsqrt7(-subnormal) = canonical NaN
3544      */
3545     if (float32_is_signaling_nan(f, s) ||
3546             (float32_is_infinity(f) && sign) ||
3547             (float32_is_normal(f) && sign) ||
3548             (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3549         s->float_exception_flags |= float_flag_invalid;
3550         return float32_default_nan(s);
3551     }
3552 
3553     /* frsqrt7(qNaN) = canonical NaN */
3554     if (float32_is_quiet_nan(f, s)) {
3555         return float32_default_nan(s);
3556     }
3557 
3558     /* frsqrt7(+-0) = +-inf */
3559     if (float32_is_zero(f)) {
3560         s->float_exception_flags |= float_flag_divbyzero;
3561         return float32_set_sign(float32_infinity, sign);
3562     }
3563 
3564     /* frsqrt7(+inf) = +0 */
3565     if (float32_is_infinity(f) && !sign) {
3566         return float32_set_sign(float32_zero, sign);
3567     }
3568 
3569     /* +normal, +subnormal */
3570     uint64_t val = frsqrt7(f, exp_size, frac_size);
3571     return make_float32(val);
3572 }
3573 
3574 static float64 frsqrt7_d(float64 f, float_status *s)
3575 {
3576     int exp_size = 11, frac_size = 52;
3577     bool sign = float64_is_neg(f);
3578 
3579     /*
3580      * frsqrt7(sNaN) = canonical NaN
3581      * frsqrt7(-inf) = canonical NaN
3582      * frsqrt7(-normal) = canonical NaN
3583      * frsqrt7(-subnormal) = canonical NaN
3584      */
3585     if (float64_is_signaling_nan(f, s) ||
3586             (float64_is_infinity(f) && sign) ||
3587             (float64_is_normal(f) && sign) ||
3588             (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3589         s->float_exception_flags |= float_flag_invalid;
3590         return float64_default_nan(s);
3591     }
3592 
3593     /* frsqrt7(qNaN) = canonical NaN */
3594     if (float64_is_quiet_nan(f, s)) {
3595         return float64_default_nan(s);
3596     }
3597 
3598     /* frsqrt7(+-0) = +-inf */
3599     if (float64_is_zero(f)) {
3600         s->float_exception_flags |= float_flag_divbyzero;
3601         return float64_set_sign(float64_infinity, sign);
3602     }
3603 
3604     /* frsqrt7(+inf) = +0 */
3605     if (float64_is_infinity(f) && !sign) {
3606         return float64_set_sign(float64_zero, sign);
3607     }
3608 
3609     /* +normal, +subnormal */
3610     uint64_t val = frsqrt7(f, exp_size, frac_size);
3611     return make_float64(val);
3612 }
3613 
3614 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3615 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3616 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3617 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
3618 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
3619 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
3620 
3621 /*
3622  * Vector Floating-Point Reciprocal Estimate Instruction
3623  *
3624  * Adapted from riscv-v-spec recip.c:
3625  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3626  */
3627 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3628                       float_status *s)
3629 {
3630     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3631     uint64_t exp = extract64(f, frac_size, exp_size);
3632     uint64_t frac = extract64(f, 0, frac_size);
3633 
3634     const uint8_t lookup_table[] = {
3635         127, 125, 123, 121, 119, 117, 116, 114,
3636         112, 110, 109, 107, 105, 104, 102, 100,
3637         99, 97, 96, 94, 93, 91, 90, 88,
3638         87, 85, 84, 83, 81, 80, 79, 77,
3639         76, 75, 74, 72, 71, 70, 69, 68,
3640         66, 65, 64, 63, 62, 61, 60, 59,
3641         58, 57, 56, 55, 54, 53, 52, 51,
3642         50, 49, 48, 47, 46, 45, 44, 43,
3643         42, 41, 40, 40, 39, 38, 37, 36,
3644         35, 35, 34, 33, 32, 31, 31, 30,
3645         29, 28, 28, 27, 26, 25, 25, 24,
3646         23, 23, 22, 21, 21, 20, 19, 19,
3647         18, 17, 17, 16, 15, 15, 14, 14,
3648         13, 12, 12, 11, 11, 10, 9, 9,
3649         8, 8, 7, 7, 6, 5, 5, 4,
3650         4, 3, 3, 2, 2, 1, 1, 0
3651     };
3652     const int precision = 7;
3653 
3654     if (exp == 0 && frac != 0) { /* subnormal */
3655         /* Normalize the subnormal. */
3656         while (extract64(frac, frac_size - 1, 1) == 0) {
3657             exp--;
3658             frac <<= 1;
3659         }
3660 
3661         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3662 
3663         if (exp != 0 && exp != UINT64_MAX) {
3664             /*
3665              * Overflow to inf or max value of same sign,
3666              * depending on sign and rounding mode.
3667              */
3668             s->float_exception_flags |= (float_flag_inexact |
3669                                          float_flag_overflow);
3670 
3671             if ((s->float_rounding_mode == float_round_to_zero) ||
3672                 ((s->float_rounding_mode == float_round_down) && !sign) ||
3673                 ((s->float_rounding_mode == float_round_up) && sign)) {
3674                 /* Return greatest/negative finite value. */
3675                 return (sign << (exp_size + frac_size)) |
3676                     (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3677             } else {
3678                 /* Return +-inf. */
3679                 return (sign << (exp_size + frac_size)) |
3680                     MAKE_64BIT_MASK(frac_size, exp_size);
3681             }
3682         }
3683     }
3684 
3685     int idx = frac >> (frac_size - precision);
3686     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3687                             (frac_size - precision);
3688     uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3689 
3690     if (out_exp == 0 || out_exp == UINT64_MAX) {
3691         /*
3692          * The result is subnormal, but don't raise the underflow exception,
3693          * because there's no additional loss of precision.
3694          */
3695         out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3696         if (out_exp == UINT64_MAX) {
3697             out_frac >>= 1;
3698             out_exp = 0;
3699         }
3700     }
3701 
3702     uint64_t val = 0;
3703     val = deposit64(val, 0, frac_size, out_frac);
3704     val = deposit64(val, frac_size, exp_size, out_exp);
3705     val = deposit64(val, frac_size + exp_size, 1, sign);
3706     return val;
3707 }
3708 
3709 static float16 frec7_h(float16 f, float_status *s)
3710 {
3711     int exp_size = 5, frac_size = 10;
3712     bool sign = float16_is_neg(f);
3713 
3714     /* frec7(+-inf) = +-0 */
3715     if (float16_is_infinity(f)) {
3716         return float16_set_sign(float16_zero, sign);
3717     }
3718 
3719     /* frec7(+-0) = +-inf */
3720     if (float16_is_zero(f)) {
3721         s->float_exception_flags |= float_flag_divbyzero;
3722         return float16_set_sign(float16_infinity, sign);
3723     }
3724 
3725     /* frec7(sNaN) = canonical NaN */
3726     if (float16_is_signaling_nan(f, s)) {
3727         s->float_exception_flags |= float_flag_invalid;
3728         return float16_default_nan(s);
3729     }
3730 
3731     /* frec7(qNaN) = canonical NaN */
3732     if (float16_is_quiet_nan(f, s)) {
3733         return float16_default_nan(s);
3734     }
3735 
3736     /* +-normal, +-subnormal */
3737     uint64_t val = frec7(f, exp_size, frac_size, s);
3738     return make_float16(val);
3739 }
3740 
3741 static float32 frec7_s(float32 f, float_status *s)
3742 {
3743     int exp_size = 8, frac_size = 23;
3744     bool sign = float32_is_neg(f);
3745 
3746     /* frec7(+-inf) = +-0 */
3747     if (float32_is_infinity(f)) {
3748         return float32_set_sign(float32_zero, sign);
3749     }
3750 
3751     /* frec7(+-0) = +-inf */
3752     if (float32_is_zero(f)) {
3753         s->float_exception_flags |= float_flag_divbyzero;
3754         return float32_set_sign(float32_infinity, sign);
3755     }
3756 
3757     /* frec7(sNaN) = canonical NaN */
3758     if (float32_is_signaling_nan(f, s)) {
3759         s->float_exception_flags |= float_flag_invalid;
3760         return float32_default_nan(s);
3761     }
3762 
3763     /* frec7(qNaN) = canonical NaN */
3764     if (float32_is_quiet_nan(f, s)) {
3765         return float32_default_nan(s);
3766     }
3767 
3768     /* +-normal, +-subnormal */
3769     uint64_t val = frec7(f, exp_size, frac_size, s);
3770     return make_float32(val);
3771 }
3772 
3773 static float64 frec7_d(float64 f, float_status *s)
3774 {
3775     int exp_size = 11, frac_size = 52;
3776     bool sign = float64_is_neg(f);
3777 
3778     /* frec7(+-inf) = +-0 */
3779     if (float64_is_infinity(f)) {
3780         return float64_set_sign(float64_zero, sign);
3781     }
3782 
3783     /* frec7(+-0) = +-inf */
3784     if (float64_is_zero(f)) {
3785         s->float_exception_flags |= float_flag_divbyzero;
3786         return float64_set_sign(float64_infinity, sign);
3787     }
3788 
3789     /* frec7(sNaN) = canonical NaN */
3790     if (float64_is_signaling_nan(f, s)) {
3791         s->float_exception_flags |= float_flag_invalid;
3792         return float64_default_nan(s);
3793     }
3794 
3795     /* frec7(qNaN) = canonical NaN */
3796     if (float64_is_quiet_nan(f, s)) {
3797         return float64_default_nan(s);
3798     }
3799 
3800     /* +-normal, +-subnormal */
3801     uint64_t val = frec7(f, exp_size, frac_size, s);
3802     return make_float64(val);
3803 }
3804 
3805 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3806 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3807 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3808 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
3809 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
3810 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
3811 
3812 /* Vector Floating-Point MIN/MAX Instructions */
3813 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3814 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3815 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3816 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3817 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3818 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3819 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3820 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3821 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3822 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3823 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3824 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3825 
3826 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3827 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3828 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3829 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3830 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3831 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3832 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3833 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3834 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3835 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3836 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3837 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3838 
3839 /* Vector Floating-Point Sign-Injection Instructions */
3840 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3841 {
3842     return deposit64(b, 0, 15, a);
3843 }
3844 
3845 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3846 {
3847     return deposit64(b, 0, 31, a);
3848 }
3849 
3850 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3851 {
3852     return deposit64(b, 0, 63, a);
3853 }
3854 
3855 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3856 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3857 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3858 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3859 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3860 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3861 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3862 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3863 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3864 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3865 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3866 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3867 
3868 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3869 {
3870     return deposit64(~b, 0, 15, a);
3871 }
3872 
3873 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3874 {
3875     return deposit64(~b, 0, 31, a);
3876 }
3877 
3878 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3879 {
3880     return deposit64(~b, 0, 63, a);
3881 }
3882 
3883 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3884 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3885 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3886 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3887 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3888 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3889 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3890 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3891 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3892 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3893 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3894 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3895 
3896 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3897 {
3898     return deposit64(b ^ a, 0, 15, a);
3899 }
3900 
3901 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3902 {
3903     return deposit64(b ^ a, 0, 31, a);
3904 }
3905 
3906 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3907 {
3908     return deposit64(b ^ a, 0, 63, a);
3909 }
3910 
3911 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3912 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3913 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3914 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3915 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3916 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3917 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3918 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3919 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3920 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3921 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3922 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3923 
3924 /* Vector Floating-Point Compare Instructions */
3925 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3926 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3927                   CPURISCVState *env, uint32_t desc)          \
3928 {                                                             \
3929     uint32_t vm = vext_vm(desc);                              \
3930     uint32_t vl = env->vl;                                    \
3931     uint32_t i;                                               \
3932                                                               \
3933     for (i = env->vstart; i < vl; i++) {                      \
3934         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3935         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3936         if (!vm && !vext_elem_mask(v0, i)) {                  \
3937             continue;                                         \
3938         }                                                     \
3939         vext_set_elem_mask(vd, i,                             \
3940                            DO_OP(s2, s1, &env->fp_status));   \
3941     }                                                         \
3942     env->vstart = 0;                                          \
3943 }
3944 
3945 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3946 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3947 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3948 
3949 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3950 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3951                   CPURISCVState *env, uint32_t desc)                \
3952 {                                                                   \
3953     uint32_t vm = vext_vm(desc);                                    \
3954     uint32_t vl = env->vl;                                          \
3955     uint32_t i;                                                     \
3956                                                                     \
3957     for (i = env->vstart; i < vl; i++) {                            \
3958         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3959         if (!vm && !vext_elem_mask(v0, i)) {                        \
3960             continue;                                               \
3961         }                                                           \
3962         vext_set_elem_mask(vd, i,                                   \
3963                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3964     }                                                               \
3965     env->vstart = 0;                                                \
3966 }
3967 
3968 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3969 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3970 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3971 
3972 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3973 {
3974     FloatRelation compare = float16_compare_quiet(a, b, s);
3975     return compare != float_relation_equal;
3976 }
3977 
3978 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3979 {
3980     FloatRelation compare = float32_compare_quiet(a, b, s);
3981     return compare != float_relation_equal;
3982 }
3983 
3984 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3985 {
3986     FloatRelation compare = float64_compare_quiet(a, b, s);
3987     return compare != float_relation_equal;
3988 }
3989 
3990 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3991 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3992 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3993 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3994 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3995 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3996 
3997 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3998 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3999 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4000 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4001 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4002 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4003 
4004 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4005 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4006 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4007 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4008 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4009 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4010 
4011 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4012 {
4013     FloatRelation compare = float16_compare(a, b, s);
4014     return compare == float_relation_greater;
4015 }
4016 
4017 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4018 {
4019     FloatRelation compare = float32_compare(a, b, s);
4020     return compare == float_relation_greater;
4021 }
4022 
4023 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4024 {
4025     FloatRelation compare = float64_compare(a, b, s);
4026     return compare == float_relation_greater;
4027 }
4028 
4029 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4030 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4031 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4032 
4033 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4034 {
4035     FloatRelation compare = float16_compare(a, b, s);
4036     return compare == float_relation_greater ||
4037            compare == float_relation_equal;
4038 }
4039 
4040 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4041 {
4042     FloatRelation compare = float32_compare(a, b, s);
4043     return compare == float_relation_greater ||
4044            compare == float_relation_equal;
4045 }
4046 
4047 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4048 {
4049     FloatRelation compare = float64_compare(a, b, s);
4050     return compare == float_relation_greater ||
4051            compare == float_relation_equal;
4052 }
4053 
4054 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4055 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4056 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4057 
4058 /* Vector Floating-Point Classify Instruction */
4059 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4060 static void do_##NAME(void *vd, void *vs2, int i)      \
4061 {                                                      \
4062     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4063     *((TD *)vd + HD(i)) = OP(s2);                      \
4064 }
4065 
4066 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
4067 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4068                   CPURISCVState *env, uint32_t desc)   \
4069 {                                                      \
4070     uint32_t vm = vext_vm(desc);                       \
4071     uint32_t vl = env->vl;                             \
4072     uint32_t i;                                        \
4073                                                        \
4074     for (i = env->vstart; i < vl; i++) {               \
4075         if (!vm && !vext_elem_mask(v0, i)) {           \
4076             continue;                                  \
4077         }                                              \
4078         do_##NAME(vd, vs2, i);                         \
4079     }                                                  \
4080     env->vstart = 0;                                   \
4081 }
4082 
4083 target_ulong fclass_h(uint64_t frs1)
4084 {
4085     float16 f = frs1;
4086     bool sign = float16_is_neg(f);
4087 
4088     if (float16_is_infinity(f)) {
4089         return sign ? 1 << 0 : 1 << 7;
4090     } else if (float16_is_zero(f)) {
4091         return sign ? 1 << 3 : 1 << 4;
4092     } else if (float16_is_zero_or_denormal(f)) {
4093         return sign ? 1 << 2 : 1 << 5;
4094     } else if (float16_is_any_nan(f)) {
4095         float_status s = { }; /* for snan_bit_is_one */
4096         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4097     } else {
4098         return sign ? 1 << 1 : 1 << 6;
4099     }
4100 }
4101 
4102 target_ulong fclass_s(uint64_t frs1)
4103 {
4104     float32 f = frs1;
4105     bool sign = float32_is_neg(f);
4106 
4107     if (float32_is_infinity(f)) {
4108         return sign ? 1 << 0 : 1 << 7;
4109     } else if (float32_is_zero(f)) {
4110         return sign ? 1 << 3 : 1 << 4;
4111     } else if (float32_is_zero_or_denormal(f)) {
4112         return sign ? 1 << 2 : 1 << 5;
4113     } else if (float32_is_any_nan(f)) {
4114         float_status s = { }; /* for snan_bit_is_one */
4115         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4116     } else {
4117         return sign ? 1 << 1 : 1 << 6;
4118     }
4119 }
4120 
4121 target_ulong fclass_d(uint64_t frs1)
4122 {
4123     float64 f = frs1;
4124     bool sign = float64_is_neg(f);
4125 
4126     if (float64_is_infinity(f)) {
4127         return sign ? 1 << 0 : 1 << 7;
4128     } else if (float64_is_zero(f)) {
4129         return sign ? 1 << 3 : 1 << 4;
4130     } else if (float64_is_zero_or_denormal(f)) {
4131         return sign ? 1 << 2 : 1 << 5;
4132     } else if (float64_is_any_nan(f)) {
4133         float_status s = { }; /* for snan_bit_is_one */
4134         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4135     } else {
4136         return sign ? 1 << 1 : 1 << 6;
4137     }
4138 }
4139 
4140 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4141 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4142 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4143 GEN_VEXT_V(vfclass_v_h, 2, 2)
4144 GEN_VEXT_V(vfclass_v_w, 4, 4)
4145 GEN_VEXT_V(vfclass_v_d, 8, 8)
4146 
4147 /* Vector Floating-Point Merge Instruction */
4148 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
4149 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4150                   CPURISCVState *env, uint32_t desc)          \
4151 {                                                             \
4152     uint32_t vm = vext_vm(desc);                              \
4153     uint32_t vl = env->vl;                                    \
4154     uint32_t i;                                               \
4155                                                               \
4156     for (i = env->vstart; i < vl; i++) {                      \
4157         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4158         *((ETYPE *)vd + H(i))                                 \
4159           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
4160     }                                                         \
4161     env->vstart = 0;                                          \
4162 }
4163 
4164 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4165 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4166 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4167 
4168 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4169 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4170 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4171 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4172 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4173 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
4174 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
4175 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
4176 
4177 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4178 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4179 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4180 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4181 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
4182 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
4183 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
4184 
4185 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4186 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4187 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4188 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4189 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
4190 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
4191 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
4192 
4193 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4194 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4195 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4196 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4197 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
4198 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
4199 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4200 
4201 /* Widening Floating-Point/Integer Type-Convert Instructions */
4202 /* (TD, T2, TX2) */
4203 #define WOP_UU_B uint16_t, uint8_t,  uint8_t
4204 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4205 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4206 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4207 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4208 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4209 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
4210 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4211 
4212 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4213 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4214 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4215 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
4216 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4217 
4218 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4219 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4220 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4221 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4222 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
4223 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
4224 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4225 
4226 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4227 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4228 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4229 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4230 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
4231 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
4232 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4233 
4234 /*
4235  * vfwcvt.f.f.v vd, vs2, vm
4236  * Convert single-width float to double-width float.
4237  */
4238 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4239 {
4240     return float16_to_float32(a, true, s);
4241 }
4242 
4243 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4244 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4245 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
4246 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
4247 
4248 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4249 /* (TD, T2, TX2) */
4250 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
4251 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4252 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4253 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4254 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4255 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4256 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4257 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
4258 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
4259 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
4260 
4261 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4262 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4263 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4264 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4265 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
4266 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
4267 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
4268 
4269 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4270 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4271 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4272 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
4273 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
4274 
4275 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4276 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4277 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4278 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
4279 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
4280 
4281 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4282 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4283 {
4284     return float32_to_float16(a, true, s);
4285 }
4286 
4287 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4288 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4289 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
4290 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
4291 
4292 /*
4293  *** Vector Reduction Operations
4294  */
4295 /* Vector Single-Width Integer Reduction Instructions */
4296 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4297 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4298         void *vs2, CPURISCVState *env, uint32_t desc)     \
4299 {                                                         \
4300     uint32_t vm = vext_vm(desc);                          \
4301     uint32_t vl = env->vl;                                \
4302     uint32_t i;                                           \
4303     TD s1 =  *((TD *)vs1 + HD(0));                        \
4304                                                           \
4305     for (i = env->vstart; i < vl; i++) {                  \
4306         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4307         if (!vm && !vext_elem_mask(v0, i)) {              \
4308             continue;                                     \
4309         }                                                 \
4310         s1 = OP(s1, (TD)s2);                              \
4311     }                                                     \
4312     *((TD *)vd + HD(0)) = s1;                             \
4313     env->vstart = 0;                                      \
4314 }
4315 
4316 /* vd[0] = sum(vs1[0], vs2[*]) */
4317 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4318 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4319 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4320 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4321 
4322 /* vd[0] = maxu(vs1[0], vs2[*]) */
4323 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4324 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4325 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4326 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4327 
4328 /* vd[0] = max(vs1[0], vs2[*]) */
4329 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4330 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4331 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4332 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4333 
4334 /* vd[0] = minu(vs1[0], vs2[*]) */
4335 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4336 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4337 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4338 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4339 
4340 /* vd[0] = min(vs1[0], vs2[*]) */
4341 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4342 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4343 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4344 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4345 
4346 /* vd[0] = and(vs1[0], vs2[*]) */
4347 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4348 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4349 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4350 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4351 
4352 /* vd[0] = or(vs1[0], vs2[*]) */
4353 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4354 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4355 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4356 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4357 
4358 /* vd[0] = xor(vs1[0], vs2[*]) */
4359 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4360 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4361 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4362 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4363 
4364 /* Vector Widening Integer Reduction Instructions */
4365 /* signed sum reduction into double-width accumulator */
4366 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4367 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4368 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4369 
4370 /* Unsigned sum reduction into double-width accumulator */
4371 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4372 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4373 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4374 
4375 /* Vector Single-Width Floating-Point Reduction Instructions */
4376 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4377 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4378                   void *vs2, CPURISCVState *env,           \
4379                   uint32_t desc)                           \
4380 {                                                          \
4381     uint32_t vm = vext_vm(desc);                           \
4382     uint32_t vl = env->vl;                                 \
4383     uint32_t i;                                            \
4384     TD s1 =  *((TD *)vs1 + HD(0));                         \
4385                                                            \
4386     for (i = env->vstart; i < vl; i++) {                   \
4387         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4388         if (!vm && !vext_elem_mask(v0, i)) {               \
4389             continue;                                      \
4390         }                                                  \
4391         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4392     }                                                      \
4393     *((TD *)vd + HD(0)) = s1;                              \
4394     env->vstart = 0;                                       \
4395 }
4396 
4397 /* Unordered sum */
4398 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4399 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4400 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4401 
4402 /* Maximum value */
4403 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4404 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4405 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4406 
4407 /* Minimum value */
4408 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4409 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4410 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4411 
4412 /* Vector Widening Floating-Point Reduction Instructions */
4413 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4414 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4415                             void *vs2, CPURISCVState *env, uint32_t desc)
4416 {
4417     uint32_t vm = vext_vm(desc);
4418     uint32_t vl = env->vl;
4419     uint32_t i;
4420     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4421 
4422     for (i = env->vstart; i < vl; i++) {
4423         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4424         if (!vm && !vext_elem_mask(v0, i)) {
4425             continue;
4426         }
4427         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4428                          &env->fp_status);
4429     }
4430     *((uint32_t *)vd + H4(0)) = s1;
4431     env->vstart = 0;
4432 }
4433 
4434 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4435                             void *vs2, CPURISCVState *env, uint32_t desc)
4436 {
4437     uint32_t vm = vext_vm(desc);
4438     uint32_t vl = env->vl;
4439     uint32_t i;
4440     uint64_t s1 =  *((uint64_t *)vs1);
4441 
4442     for (i = env->vstart; i < vl; i++) {
4443         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4444         if (!vm && !vext_elem_mask(v0, i)) {
4445             continue;
4446         }
4447         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4448                          &env->fp_status);
4449     }
4450     *((uint64_t *)vd) = s1;
4451     env->vstart = 0;
4452 }
4453 
4454 /*
4455  *** Vector Mask Operations
4456  */
4457 /* Vector Mask-Register Logical Instructions */
4458 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4459 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4460                   void *vs2, CPURISCVState *env,          \
4461                   uint32_t desc)                          \
4462 {                                                         \
4463     uint32_t vl = env->vl;                                \
4464     uint32_t i;                                           \
4465     int a, b;                                             \
4466                                                           \
4467     for (i = env->vstart; i < vl; i++) {                  \
4468         a = vext_elem_mask(vs1, i);                       \
4469         b = vext_elem_mask(vs2, i);                       \
4470         vext_set_elem_mask(vd, i, OP(b, a));              \
4471     }                                                     \
4472     env->vstart = 0;                                      \
4473 }
4474 
4475 #define DO_NAND(N, M)  (!(N & M))
4476 #define DO_ANDNOT(N, M)  (N & !M)
4477 #define DO_NOR(N, M)  (!(N | M))
4478 #define DO_ORNOT(N, M)  (N | !M)
4479 #define DO_XNOR(N, M)  (!(N ^ M))
4480 
4481 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4482 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4483 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4484 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4485 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4486 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4487 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4488 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4489 
4490 /* Vector count population in mask vcpop */
4491 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4492                              uint32_t desc)
4493 {
4494     target_ulong cnt = 0;
4495     uint32_t vm = vext_vm(desc);
4496     uint32_t vl = env->vl;
4497     int i;
4498 
4499     for (i = env->vstart; i < vl; i++) {
4500         if (vm || vext_elem_mask(v0, i)) {
4501             if (vext_elem_mask(vs2, i)) {
4502                 cnt++;
4503             }
4504         }
4505     }
4506     env->vstart = 0;
4507     return cnt;
4508 }
4509 
4510 /* vfirst find-first-set mask bit*/
4511 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4512                               uint32_t desc)
4513 {
4514     uint32_t vm = vext_vm(desc);
4515     uint32_t vl = env->vl;
4516     int i;
4517 
4518     for (i = env->vstart; i < vl; i++) {
4519         if (vm || vext_elem_mask(v0, i)) {
4520             if (vext_elem_mask(vs2, i)) {
4521                 return i;
4522             }
4523         }
4524     }
4525     env->vstart = 0;
4526     return -1LL;
4527 }
4528 
4529 enum set_mask_type {
4530     ONLY_FIRST = 1,
4531     INCLUDE_FIRST,
4532     BEFORE_FIRST,
4533 };
4534 
4535 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4536                    uint32_t desc, enum set_mask_type type)
4537 {
4538     uint32_t vm = vext_vm(desc);
4539     uint32_t vl = env->vl;
4540     int i;
4541     bool first_mask_bit = false;
4542 
4543     for (i = env->vstart; i < vl; i++) {
4544         if (!vm && !vext_elem_mask(v0, i)) {
4545             continue;
4546         }
4547         /* write a zero to all following active elements */
4548         if (first_mask_bit) {
4549             vext_set_elem_mask(vd, i, 0);
4550             continue;
4551         }
4552         if (vext_elem_mask(vs2, i)) {
4553             first_mask_bit = true;
4554             if (type == BEFORE_FIRST) {
4555                 vext_set_elem_mask(vd, i, 0);
4556             } else {
4557                 vext_set_elem_mask(vd, i, 1);
4558             }
4559         } else {
4560             if (type == ONLY_FIRST) {
4561                 vext_set_elem_mask(vd, i, 0);
4562             } else {
4563                 vext_set_elem_mask(vd, i, 1);
4564             }
4565         }
4566     }
4567     env->vstart = 0;
4568 }
4569 
4570 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4571                      uint32_t desc)
4572 {
4573     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4574 }
4575 
4576 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4577                      uint32_t desc)
4578 {
4579     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4580 }
4581 
4582 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4583                      uint32_t desc)
4584 {
4585     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4586 }
4587 
4588 /* Vector Iota Instruction */
4589 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4590 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4591                   uint32_t desc)                                          \
4592 {                                                                         \
4593     uint32_t vm = vext_vm(desc);                                          \
4594     uint32_t vl = env->vl;                                                \
4595     uint32_t sum = 0;                                                     \
4596     int i;                                                                \
4597                                                                           \
4598     for (i = env->vstart; i < vl; i++) {                                  \
4599         if (!vm && !vext_elem_mask(v0, i)) {                              \
4600             continue;                                                     \
4601         }                                                                 \
4602         *((ETYPE *)vd + H(i)) = sum;                                      \
4603         if (vext_elem_mask(vs2, i)) {                                     \
4604             sum++;                                                        \
4605         }                                                                 \
4606     }                                                                     \
4607     env->vstart = 0;                                                      \
4608 }
4609 
4610 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4611 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4612 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4613 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4614 
4615 /* Vector Element Index Instruction */
4616 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4617 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4618 {                                                                         \
4619     uint32_t vm = vext_vm(desc);                                          \
4620     uint32_t vl = env->vl;                                                \
4621     int i;                                                                \
4622                                                                           \
4623     for (i = env->vstart; i < vl; i++) {                                  \
4624         if (!vm && !vext_elem_mask(v0, i)) {                              \
4625             continue;                                                     \
4626         }                                                                 \
4627         *((ETYPE *)vd + H(i)) = i;                                        \
4628     }                                                                     \
4629     env->vstart = 0;                                                      \
4630 }
4631 
4632 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4633 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4634 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4635 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4636 
4637 /*
4638  *** Vector Permutation Instructions
4639  */
4640 
4641 /* Vector Slide Instructions */
4642 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4643 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4644                   CPURISCVState *env, uint32_t desc)                      \
4645 {                                                                         \
4646     uint32_t vm = vext_vm(desc);                                          \
4647     uint32_t vl = env->vl;                                                \
4648     target_ulong offset = s1, i_min, i;                                   \
4649                                                                           \
4650     i_min = MAX(env->vstart, offset);                                     \
4651     for (i = i_min; i < vl; i++) {                                        \
4652         if (!vm && !vext_elem_mask(v0, i)) {                              \
4653             continue;                                                     \
4654         }                                                                 \
4655         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4656     }                                                                     \
4657 }
4658 
4659 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4660 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4661 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4662 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4663 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4664 
4665 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4666 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4667                   CPURISCVState *env, uint32_t desc)                      \
4668 {                                                                         \
4669     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4670     uint32_t vm = vext_vm(desc);                                          \
4671     uint32_t vl = env->vl;                                                \
4672     target_ulong i_max, i;                                                \
4673                                                                           \
4674     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4675     for (i = env->vstart; i < i_max; ++i) {                               \
4676         if (vm || vext_elem_mask(v0, i)) {                                \
4677             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4678         }                                                                 \
4679     }                                                                     \
4680                                                                           \
4681     for (i = i_max; i < vl; ++i) {                                        \
4682         if (vm || vext_elem_mask(v0, i)) {                                \
4683             *((ETYPE *)vd + H(i)) = 0;                                    \
4684         }                                                                 \
4685     }                                                                     \
4686                                                                           \
4687     env->vstart = 0;                                                      \
4688 }
4689 
4690 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4691 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4692 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4693 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4694 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4695 
4696 #define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4697 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4698                      CPURISCVState *env, uint32_t desc)                     \
4699 {                                                                           \
4700     typedef uint##ESZ##_t ETYPE;                                            \
4701     uint32_t vm = vext_vm(desc);                                            \
4702     uint32_t vl = env->vl;                                                  \
4703     uint32_t i;                                                             \
4704                                                                             \
4705     for (i = env->vstart; i < vl; i++) {                                    \
4706         if (!vm && !vext_elem_mask(v0, i)) {                                \
4707             continue;                                                       \
4708         }                                                                   \
4709         if (i == 0) {                                                       \
4710             *((ETYPE *)vd + H(i)) = s1;                                     \
4711         } else {                                                            \
4712             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4713         }                                                                   \
4714     }                                                                       \
4715     env->vstart = 0;                                                        \
4716 }
4717 
4718 GEN_VEXT_VSLIE1UP(8,  H1)
4719 GEN_VEXT_VSLIE1UP(16, H2)
4720 GEN_VEXT_VSLIE1UP(32, H4)
4721 GEN_VEXT_VSLIE1UP(64, H8)
4722 
4723 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4724 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4725                   CPURISCVState *env, uint32_t desc)              \
4726 {                                                                 \
4727     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4728 }
4729 
4730 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4731 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4732 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4733 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4734 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4735 
4736 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4737 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4738                        CPURISCVState *env, uint32_t desc)                     \
4739 {                                                                             \
4740     typedef uint##ESZ##_t ETYPE;                                              \
4741     uint32_t vm = vext_vm(desc);                                              \
4742     uint32_t vl = env->vl;                                                    \
4743     uint32_t i;                                                               \
4744                                                                               \
4745     for (i = env->vstart; i < vl; i++) {                                      \
4746         if (!vm && !vext_elem_mask(v0, i)) {                                  \
4747             continue;                                                         \
4748         }                                                                     \
4749         if (i == vl - 1) {                                                    \
4750             *((ETYPE *)vd + H(i)) = s1;                                       \
4751         } else {                                                              \
4752             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4753         }                                                                     \
4754     }                                                                         \
4755     env->vstart = 0;                                                          \
4756 }
4757 
4758 GEN_VEXT_VSLIDE1DOWN(8,  H1)
4759 GEN_VEXT_VSLIDE1DOWN(16, H2)
4760 GEN_VEXT_VSLIDE1DOWN(32, H4)
4761 GEN_VEXT_VSLIDE1DOWN(64, H8)
4762 
4763 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4764 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4765                   CPURISCVState *env, uint32_t desc)              \
4766 {                                                                 \
4767     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4768 }
4769 
4770 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4771 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4772 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4773 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4774 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4775 
4776 /* Vector Floating-Point Slide Instructions */
4777 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4778 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4779                   CPURISCVState *env, uint32_t desc)          \
4780 {                                                             \
4781     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4782 }
4783 
4784 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4785 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4786 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4787 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4788 
4789 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4790 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4791                   CPURISCVState *env, uint32_t desc)          \
4792 {                                                             \
4793     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4794 }
4795 
4796 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4797 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4798 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4799 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4800 
4801 /* Vector Register Gather Instruction */
4802 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4803 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4804                   CPURISCVState *env, uint32_t desc)                      \
4805 {                                                                         \
4806     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4807     uint32_t vm = vext_vm(desc);                                          \
4808     uint32_t vl = env->vl;                                                \
4809     uint64_t index;                                                       \
4810     uint32_t i;                                                           \
4811                                                                           \
4812     for (i = env->vstart; i < vl; i++) {                                  \
4813         if (!vm && !vext_elem_mask(v0, i)) {                              \
4814             continue;                                                     \
4815         }                                                                 \
4816         index = *((TS1 *)vs1 + HS1(i));                                   \
4817         if (index >= vlmax) {                                             \
4818             *((TS2 *)vd + HS2(i)) = 0;                                    \
4819         } else {                                                          \
4820             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4821         }                                                                 \
4822     }                                                                     \
4823     env->vstart = 0;                                                      \
4824 }
4825 
4826 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4827 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4828 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4829 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4830 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4831 
4832 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4833 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4834 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4835 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4836 
4837 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4838 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4839                   CPURISCVState *env, uint32_t desc)                      \
4840 {                                                                         \
4841     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4842     uint32_t vm = vext_vm(desc);                                          \
4843     uint32_t vl = env->vl;                                                \
4844     uint64_t index = s1;                                                  \
4845     uint32_t i;                                                           \
4846                                                                           \
4847     for (i = env->vstart; i < vl; i++) {                                  \
4848         if (!vm && !vext_elem_mask(v0, i)) {                              \
4849             continue;                                                     \
4850         }                                                                 \
4851         if (index >= vlmax) {                                             \
4852             *((ETYPE *)vd + H(i)) = 0;                                    \
4853         } else {                                                          \
4854             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4855         }                                                                 \
4856     }                                                                     \
4857     env->vstart = 0;                                                      \
4858 }
4859 
4860 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4861 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4862 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4863 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4864 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4865 
4866 /* Vector Compress Instruction */
4867 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4868 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4869                   CPURISCVState *env, uint32_t desc)                      \
4870 {                                                                         \
4871     uint32_t vl = env->vl;                                                \
4872     uint32_t num = 0, i;                                                  \
4873                                                                           \
4874     for (i = env->vstart; i < vl; i++) {                                  \
4875         if (!vext_elem_mask(vs1, i)) {                                    \
4876             continue;                                                     \
4877         }                                                                 \
4878         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4879         num++;                                                            \
4880     }                                                                     \
4881     env->vstart = 0;                                                      \
4882 }
4883 
4884 /* Compress into vd elements of vs2 where vs1 is enabled */
4885 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4886 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4887 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4888 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4889 
4890 /* Vector Whole Register Move */
4891 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
4892 {
4893     /* EEW = SEW */
4894     uint32_t maxsz = simd_maxsz(desc);
4895     uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
4896     uint32_t startb = env->vstart * sewb;
4897     uint32_t i = startb;
4898 
4899     memcpy((uint8_t *)vd + H1(i),
4900            (uint8_t *)vs2 + H1(i),
4901            maxsz - startb);
4902 
4903     env->vstart = 0;
4904 }
4905 
4906 /* Vector Integer Extension */
4907 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4908 void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4909                   CPURISCVState *env, uint32_t desc)             \
4910 {                                                                \
4911     uint32_t vl = env->vl;                                       \
4912     uint32_t vm = vext_vm(desc);                                 \
4913     uint32_t i;                                                  \
4914                                                                  \
4915     for (i = env->vstart; i < vl; i++) {                         \
4916         if (!vm && !vext_elem_mask(v0, i)) {                     \
4917             continue;                                            \
4918         }                                                        \
4919         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4920     }                                                            \
4921     env->vstart = 0;                                             \
4922 }
4923 
4924 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4925 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4926 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4927 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4928 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4929 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4930 
4931 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4932 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4933 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4934 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4935 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4936 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4937