xref: /openbmc/qemu/target/riscv/vector_helper.c (revision 3cdd1f45aa55d77c6a04a546013de911eec3cced)
12b7168fcSLIU Zhiwei /*
22b7168fcSLIU Zhiwei  * RISC-V Vector Extension Helpers for QEMU.
32b7168fcSLIU Zhiwei  *
42b7168fcSLIU Zhiwei  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
52b7168fcSLIU Zhiwei  *
62b7168fcSLIU Zhiwei  * This program is free software; you can redistribute it and/or modify it
72b7168fcSLIU Zhiwei  * under the terms and conditions of the GNU General Public License,
82b7168fcSLIU Zhiwei  * version 2 or later, as published by the Free Software Foundation.
92b7168fcSLIU Zhiwei  *
102b7168fcSLIU Zhiwei  * This program is distributed in the hope it will be useful, but WITHOUT
112b7168fcSLIU Zhiwei  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
122b7168fcSLIU Zhiwei  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
132b7168fcSLIU Zhiwei  * more details.
142b7168fcSLIU Zhiwei  *
152b7168fcSLIU Zhiwei  * You should have received a copy of the GNU General Public License along with
162b7168fcSLIU Zhiwei  * this program.  If not, see <http://www.gnu.org/licenses/>.
172b7168fcSLIU Zhiwei  */
182b7168fcSLIU Zhiwei 
192b7168fcSLIU Zhiwei #include "qemu/osdep.h"
205a9f8e15SFrank Chang #include "qemu/host-utils.h"
21e848a1e5SFrank Chang #include "qemu/bitops.h"
222b7168fcSLIU Zhiwei #include "cpu.h"
23751538d5SLIU Zhiwei #include "exec/memop.h"
242b7168fcSLIU Zhiwei #include "exec/exec-all.h"
2509b07f28SPhilippe Mathieu-Daudé #include "exec/cpu_ldst.h"
2674781c08SPhilippe Mathieu-Daudé #include "exec/page-protection.h"
272b7168fcSLIU Zhiwei #include "exec/helper-proto.h"
28ce2a0343SLIU Zhiwei #include "fpu/softfloat.h"
29751538d5SLIU Zhiwei #include "tcg/tcg-gvec-desc.h"
30751538d5SLIU Zhiwei #include "internals.h"
3198f40dd2SKiran Ostrolenk #include "vector_internals.h"
322b7168fcSLIU Zhiwei #include <math.h>
332b7168fcSLIU Zhiwei 
HELPER(vsetvl)342b7168fcSLIU Zhiwei target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
352b7168fcSLIU Zhiwei                             target_ulong s2)
362b7168fcSLIU Zhiwei {
372b7168fcSLIU Zhiwei     int vlmax, vl;
382b7168fcSLIU Zhiwei     RISCVCPU *cpu = env_archcpu(env);
39cd21576dSDaniel Henrique Barboza     uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
40cd21576dSDaniel Henrique Barboza     uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
41cd21576dSDaniel Henrique Barboza     uint16_t sew = 8 << vsew;
422b7168fcSLIU Zhiwei     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
4331961cfeSLIU Zhiwei     int xlen = riscv_cpu_xlen(env);
4431961cfeSLIU Zhiwei     bool vill = (s2 >> (xlen - 1)) & 0x1;
4531961cfeSLIU Zhiwei     target_ulong reserved = s2 &
4631961cfeSLIU Zhiwei                             MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
4731961cfeSLIU Zhiwei                                             xlen - 1 - R_VTYPE_RESERVED_SHIFT);
48938dd05eSdemin.han     uint16_t vlen = cpu->cfg.vlenb << 3;
49cd21576dSDaniel Henrique Barboza     int8_t lmul;
502b7168fcSLIU Zhiwei 
51cd21576dSDaniel Henrique Barboza     if (vlmul & 4) {
527aa4d519SDaniel Henrique Barboza         /*
537aa4d519SDaniel Henrique Barboza          * Fractional LMUL, check:
547aa4d519SDaniel Henrique Barboza          *
557aa4d519SDaniel Henrique Barboza          * VLEN * LMUL >= SEW
567aa4d519SDaniel Henrique Barboza          * VLEN >> (8 - lmul) >= sew
577aa4d519SDaniel Henrique Barboza          * (vlenb << 3) >> (8 - lmul) >= sew
587aa4d519SDaniel Henrique Barboza          */
59938dd05eSdemin.han         if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
60d9b7609aSFrank Chang             vill = true;
61d9b7609aSFrank Chang         }
62d9b7609aSFrank Chang     }
63d9b7609aSFrank Chang 
64c45eff30SWeiwei Li     if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
652b7168fcSLIU Zhiwei         /* only set vill bit. */
66d96a271aSLIU Zhiwei         env->vill = 1;
67d96a271aSLIU Zhiwei         env->vtype = 0;
682b7168fcSLIU Zhiwei         env->vl = 0;
692b7168fcSLIU Zhiwei         env->vstart = 0;
702b7168fcSLIU Zhiwei         return 0;
712b7168fcSLIU Zhiwei     }
722b7168fcSLIU Zhiwei 
73cd21576dSDaniel Henrique Barboza     /* lmul encoded as in DisasContext::lmul */
74cd21576dSDaniel Henrique Barboza     lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
75cd21576dSDaniel Henrique Barboza     vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
762b7168fcSLIU Zhiwei     if (s1 <= vlmax) {
772b7168fcSLIU Zhiwei         vl = s1;
7812f1e2ecSJason Chien     } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) {
7912f1e2ecSJason Chien         vl = (s1 + 1) >> 1;
802b7168fcSLIU Zhiwei     } else {
812b7168fcSLIU Zhiwei         vl = vlmax;
822b7168fcSLIU Zhiwei     }
832b7168fcSLIU Zhiwei     env->vl = vl;
842b7168fcSLIU Zhiwei     env->vtype = s2;
852b7168fcSLIU Zhiwei     env->vstart = 0;
86ac6bcf4dSLIU Zhiwei     env->vill = 0;
872b7168fcSLIU Zhiwei     return vl;
882b7168fcSLIU Zhiwei }
89751538d5SLIU Zhiwei 
90751538d5SLIU Zhiwei /*
915a9f8e15SFrank Chang  * Get the maximum number of elements can be operated.
92751538d5SLIU Zhiwei  *
93c7b8a421SeopXD  * log2_esz: log2 of element size in bytes.
94751538d5SLIU Zhiwei  */
vext_max_elems(uint32_t desc,uint32_t log2_esz)95c7b8a421SeopXD static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
96751538d5SLIU Zhiwei {
975a9f8e15SFrank Chang     /*
988a4b5257SFrank Chang      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
995a9f8e15SFrank Chang      * so vlen in bytes (vlenb) is encoded as maxsz.
1005a9f8e15SFrank Chang      */
1015a9f8e15SFrank Chang     uint32_t vlenb = simd_maxsz(desc);
1025a9f8e15SFrank Chang 
1035a9f8e15SFrank Chang     /* Return VLMAX */
104c7b8a421SeopXD     int scale = vext_lmul(desc) - log2_esz;
1055a9f8e15SFrank Chang     return scale < 0 ? vlenb >> -scale : vlenb << scale;
106751538d5SLIU Zhiwei }
107751538d5SLIU Zhiwei 
adjust_addr(CPURISCVState * env,target_ulong addr)108d6b9d930SLIU Zhiwei static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
109d6b9d930SLIU Zhiwei {
1107b945bdcSWeiwei Li     return (addr & ~env->cur_pmmask) | env->cur_pmbase;
111d6b9d930SLIU Zhiwei }
112d6b9d930SLIU Zhiwei 
113751538d5SLIU Zhiwei /*
114751538d5SLIU Zhiwei  * This function checks watchpoint before real load operation.
115751538d5SLIU Zhiwei  *
1167893e42dSPhilippe Mathieu-Daudé  * In system mode, the TLB API probe_access is enough for watchpoint check.
117751538d5SLIU Zhiwei  * In user mode, there is no watchpoint support now.
118751538d5SLIU Zhiwei  *
119751538d5SLIU Zhiwei  * It will trigger an exception if there is no mapping in TLB
120751538d5SLIU Zhiwei  * and page table walk can't fill the TLB entry. Then the guest
121751538d5SLIU Zhiwei  * software can return here after process the exception or never return.
122751538d5SLIU Zhiwei  */
probe_pages(CPURISCVState * env,target_ulong addr,target_ulong len,uintptr_t ra,MMUAccessType access_type)123751538d5SLIU Zhiwei static void probe_pages(CPURISCVState *env, target_ulong addr,
124751538d5SLIU Zhiwei                         target_ulong len, uintptr_t ra,
125751538d5SLIU Zhiwei                         MMUAccessType access_type)
126751538d5SLIU Zhiwei {
127751538d5SLIU Zhiwei     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
128751538d5SLIU Zhiwei     target_ulong curlen = MIN(pagelen, len);
129d9996d09SRichard Henderson     int mmu_index = riscv_env_mmu_index(env, false);
130751538d5SLIU Zhiwei 
131d6b9d930SLIU Zhiwei     probe_access(env, adjust_addr(env, addr), curlen, access_type,
132d9996d09SRichard Henderson                  mmu_index, ra);
133751538d5SLIU Zhiwei     if (len > curlen) {
134751538d5SLIU Zhiwei         addr += curlen;
135751538d5SLIU Zhiwei         curlen = len - curlen;
136d6b9d930SLIU Zhiwei         probe_access(env, adjust_addr(env, addr), curlen, access_type,
137d9996d09SRichard Henderson                      mmu_index, ra);
138751538d5SLIU Zhiwei     }
139751538d5SLIU Zhiwei }
140751538d5SLIU Zhiwei 
vext_set_elem_mask(void * v0,int index,uint8_t value)141f9298de5SFrank Chang static inline void vext_set_elem_mask(void *v0, int index,
1423a6f8f68SLIU Zhiwei                                       uint8_t value)
1433a6f8f68SLIU Zhiwei {
144f9298de5SFrank Chang     int idx = index / 64;
145f9298de5SFrank Chang     int pos = index % 64;
1463a6f8f68SLIU Zhiwei     uint64_t old = ((uint64_t *)v0)[idx];
147f9298de5SFrank Chang     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
1483a6f8f68SLIU Zhiwei }
149751538d5SLIU Zhiwei 
150751538d5SLIU Zhiwei /* elements operations for load and store */
151338aa15dSMax Chou typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr,
152751538d5SLIU Zhiwei                                    uint32_t idx, void *vd, uintptr_t retaddr);
153338aa15dSMax Chou typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host);
154751538d5SLIU Zhiwei 
15579556fb6SFrank Chang #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)             \
156f8ee6f53SMax Chou static inline QEMU_ALWAYS_INLINE                            \
157f8ee6f53SMax Chou void NAME##_tlb(CPURISCVState *env, abi_ptr addr,           \
158751538d5SLIU Zhiwei                 uint32_t idx, void *vd, uintptr_t retaddr)  \
159751538d5SLIU Zhiwei {                                                           \
160751538d5SLIU Zhiwei     ETYPE *cur = ((ETYPE *)vd + H(idx));                    \
16179556fb6SFrank Chang     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);       \
162751538d5SLIU Zhiwei }                                                           \
163338aa15dSMax Chou                                                             \
164f8ee6f53SMax Chou static inline QEMU_ALWAYS_INLINE                            \
165f8ee6f53SMax Chou void NAME##_host(void *vd, uint32_t idx, void *host)        \
166338aa15dSMax Chou {                                                           \
167338aa15dSMax Chou     ETYPE *cur = ((ETYPE *)vd + H(idx));                    \
168338aa15dSMax Chou     *cur = (ETYPE)LDSUF##_p(host);                          \
169338aa15dSMax Chou }
170751538d5SLIU Zhiwei 
GEN_VEXT_LD_ELEM(lde_b,uint8_t,H1,ldub)171338aa15dSMax Chou GEN_VEXT_LD_ELEM(lde_b, uint8_t,  H1, ldub)
172338aa15dSMax Chou GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw)
173338aa15dSMax Chou GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl)
174338aa15dSMax Chou GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq)
175751538d5SLIU Zhiwei 
176751538d5SLIU Zhiwei #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)             \
177f8ee6f53SMax Chou static inline QEMU_ALWAYS_INLINE                            \
178f8ee6f53SMax Chou void NAME##_tlb(CPURISCVState *env, abi_ptr addr,           \
179751538d5SLIU Zhiwei                 uint32_t idx, void *vd, uintptr_t retaddr)  \
180751538d5SLIU Zhiwei {                                                           \
181751538d5SLIU Zhiwei     ETYPE data = *((ETYPE *)vd + H(idx));                   \
182751538d5SLIU Zhiwei     cpu_##STSUF##_data_ra(env, addr, data, retaddr);        \
183338aa15dSMax Chou }                                                           \
184338aa15dSMax Chou                                                             \
185f8ee6f53SMax Chou static inline QEMU_ALWAYS_INLINE                            \
186f8ee6f53SMax Chou void NAME##_host(void *vd, uint32_t idx, void *host)        \
187338aa15dSMax Chou {                                                           \
188338aa15dSMax Chou     ETYPE data = *((ETYPE *)vd + H(idx));                   \
189338aa15dSMax Chou     STSUF##_p(host, data);                                  \
190751538d5SLIU Zhiwei }
191751538d5SLIU Zhiwei 
192338aa15dSMax Chou GEN_VEXT_ST_ELEM(ste_b, uint8_t,  H1, stb)
193338aa15dSMax Chou GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw)
194338aa15dSMax Chou GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl)
195338aa15dSMax Chou GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq)
196751538d5SLIU Zhiwei 
197e3298878SMax Chou static inline QEMU_ALWAYS_INLINE void
198e3298878SMax Chou vext_continus_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb,
199e3298878SMax Chou                        void *vd, uint32_t evl, target_ulong addr,
200e3298878SMax Chou                        uint32_t reg_start, uintptr_t ra, uint32_t esz,
201e3298878SMax Chou                        bool is_load)
202e3298878SMax Chou {
203e3298878SMax Chou     uint32_t i;
204e3298878SMax Chou     for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) {
205e3298878SMax Chou         ldst_tlb(env, adjust_addr(env, addr), i, vd, ra);
206e3298878SMax Chou     }
207e3298878SMax Chou }
208e3298878SMax Chou 
209e3298878SMax Chou static inline QEMU_ALWAYS_INLINE void
vext_continus_ldst_host(CPURISCVState * env,vext_ldst_elem_fn_host * ldst_host,void * vd,uint32_t evl,uint32_t reg_start,void * host,uint32_t esz,bool is_load)210e3298878SMax Chou vext_continus_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host,
211e3298878SMax Chou                         void *vd, uint32_t evl, uint32_t reg_start, void *host,
212e3298878SMax Chou                         uint32_t esz, bool is_load)
213e3298878SMax Chou {
214e3298878SMax Chou #if HOST_BIG_ENDIAN
215e3298878SMax Chou     for (; reg_start < evl; reg_start++, host += esz) {
216e3298878SMax Chou         ldst_host(vd, reg_start, host);
217e3298878SMax Chou     }
218e3298878SMax Chou #else
219e3298878SMax Chou     if (esz == 1) {
220e3298878SMax Chou         uint32_t byte_offset = reg_start * esz;
221e3298878SMax Chou         uint32_t size = (evl - reg_start) * esz;
222e3298878SMax Chou 
223e3298878SMax Chou         if (is_load) {
224e3298878SMax Chou             memcpy(vd + byte_offset, host, size);
225e3298878SMax Chou         } else {
226e3298878SMax Chou             memcpy(host, vd + byte_offset, size);
227e3298878SMax Chou         }
228e3298878SMax Chou     } else {
229e3298878SMax Chou         for (; reg_start < evl; reg_start++, host += esz) {
230e3298878SMax Chou             ldst_host(vd, reg_start, host);
231e3298878SMax Chou         }
232e3298878SMax Chou     }
233e3298878SMax Chou #endif
234e3298878SMax Chou }
235e3298878SMax Chou 
vext_set_tail_elems_1s(target_ulong vl,void * vd,uint32_t desc,uint32_t nf,uint32_t esz,uint32_t max_elems)236949b6bcbSXiao Wang static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
237949b6bcbSXiao Wang                                    uint32_t desc, uint32_t nf,
238e130683fSDaniel Henrique Barboza                                    uint32_t esz, uint32_t max_elems)
239e130683fSDaniel Henrique Barboza {
240e130683fSDaniel Henrique Barboza     uint32_t vta = vext_vta(desc);
241e130683fSDaniel Henrique Barboza     int k;
242e130683fSDaniel Henrique Barboza 
243bc0ec52eSDaniel Henrique Barboza     if (vta == 0) {
244bc0ec52eSDaniel Henrique Barboza         return;
245bc0ec52eSDaniel Henrique Barboza     }
246bc0ec52eSDaniel Henrique Barboza 
247e130683fSDaniel Henrique Barboza     for (k = 0; k < nf; ++k) {
248e130683fSDaniel Henrique Barboza         vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
249e130683fSDaniel Henrique Barboza                           (k * max_elems + max_elems) * esz);
250e130683fSDaniel Henrique Barboza     }
251e130683fSDaniel Henrique Barboza }
252e130683fSDaniel Henrique Barboza 
253751538d5SLIU Zhiwei /*
2543b57254dSWeiwei Li  * stride: access vector element from strided memory
255751538d5SLIU Zhiwei  */
256751538d5SLIU Zhiwei static void
vext_ldst_stride(void * vd,void * v0,target_ulong base,target_ulong stride,CPURISCVState * env,uint32_t desc,uint32_t vm,vext_ldst_elem_fn_tlb * ldst_elem,uint32_t log2_esz,uintptr_t ra)257338aa15dSMax Chou vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride,
258338aa15dSMax Chou                  CPURISCVState *env, uint32_t desc, uint32_t vm,
259338aa15dSMax Chou                  vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz,
260338aa15dSMax Chou                  uintptr_t ra)
261751538d5SLIU Zhiwei {
262751538d5SLIU Zhiwei     uint32_t i, k;
263751538d5SLIU Zhiwei     uint32_t nf = vext_nf(desc);
264c7b8a421SeopXD     uint32_t max_elems = vext_max_elems(desc, log2_esz);
265752614caSeopXD     uint32_t esz = 1 << log2_esz;
266265ecd4cSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);
267751538d5SLIU Zhiwei 
2687f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, env->vl);
269df4252b2SDaniel Henrique Barboza 
2700a11629cSDaniel Henrique Barboza     for (i = env->vstart; i < env->vl; env->vstart = ++i) {
271751538d5SLIU Zhiwei         k = 0;
272751538d5SLIU Zhiwei         while (k < nf) {
273265ecd4cSYueh-Ting (eop) Chen             if (!vm && !vext_elem_mask(v0, i)) {
274265ecd4cSYueh-Ting (eop) Chen                 /* set masked-off elements to 1s */
275265ecd4cSYueh-Ting (eop) Chen                 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
276265ecd4cSYueh-Ting (eop) Chen                                   (i + k * max_elems + 1) * esz);
277265ecd4cSYueh-Ting (eop) Chen                 k++;
278265ecd4cSYueh-Ting (eop) Chen                 continue;
279265ecd4cSYueh-Ting (eop) Chen             }
280c7b8a421SeopXD             target_ulong addr = base + stride * i + (k << log2_esz);
281d6b9d930SLIU Zhiwei             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
282751538d5SLIU Zhiwei             k++;
283751538d5SLIU Zhiwei         }
284751538d5SLIU Zhiwei     }
285f714361eSFrank Chang     env->vstart = 0;
286e130683fSDaniel Henrique Barboza 
287949b6bcbSXiao Wang     vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
288751538d5SLIU Zhiwei }
289751538d5SLIU Zhiwei 
29079556fb6SFrank Chang #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
291751538d5SLIU Zhiwei void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
292751538d5SLIU Zhiwei                   target_ulong stride, CPURISCVState *env,              \
293751538d5SLIU Zhiwei                   uint32_t desc)                                        \
294751538d5SLIU Zhiwei {                                                                       \
295751538d5SLIU Zhiwei     uint32_t vm = vext_vm(desc);                                        \
296751538d5SLIU Zhiwei     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
29725eae048SeopXD                      ctzl(sizeof(ETYPE)), GETPC());                     \
298751538d5SLIU Zhiwei }
299751538d5SLIU Zhiwei 
GEN_VEXT_LD_STRIDE(vlse8_v,int8_t,lde_b_tlb)300338aa15dSMax Chou GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b_tlb)
301338aa15dSMax Chou GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb)
302338aa15dSMax Chou GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb)
303338aa15dSMax Chou GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb)
304751538d5SLIU Zhiwei 
30579556fb6SFrank Chang #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
306751538d5SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
307751538d5SLIU Zhiwei                   target_ulong stride, CPURISCVState *env,              \
308751538d5SLIU Zhiwei                   uint32_t desc)                                        \
309751538d5SLIU Zhiwei {                                                                       \
310751538d5SLIU Zhiwei     uint32_t vm = vext_vm(desc);                                        \
311751538d5SLIU Zhiwei     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
31225eae048SeopXD                      ctzl(sizeof(ETYPE)), GETPC());                     \
313751538d5SLIU Zhiwei }
314751538d5SLIU Zhiwei 
315338aa15dSMax Chou GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b_tlb)
316338aa15dSMax Chou GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb)
317338aa15dSMax Chou GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb)
318338aa15dSMax Chou GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb)
319751538d5SLIU Zhiwei 
320751538d5SLIU Zhiwei /*
3213b57254dSWeiwei Li  * unit-stride: access elements stored contiguously in memory
322751538d5SLIU Zhiwei  */
323751538d5SLIU Zhiwei 
324751538d5SLIU Zhiwei /* unmasked unit-stride load and store operation */
325f8ee6f53SMax Chou static inline QEMU_ALWAYS_INLINE void
326338aa15dSMax Chou vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr,
327338aa15dSMax Chou                   uint32_t elems, uint32_t nf, uint32_t max_elems,
328338aa15dSMax Chou                   uint32_t log2_esz, bool is_load, int mmu_index,
329338aa15dSMax Chou                   vext_ldst_elem_fn_tlb *ldst_tlb,
330338aa15dSMax Chou                   vext_ldst_elem_fn_host *ldst_host, uintptr_t ra)
331751538d5SLIU Zhiwei {
332338aa15dSMax Chou     void *host;
333338aa15dSMax Chou     int i, k, flags;
334338aa15dSMax Chou     uint32_t esz = 1 << log2_esz;
335338aa15dSMax Chou     uint32_t size = (elems * nf) << log2_esz;
336338aa15dSMax Chou     uint32_t evl = env->vstart + elems;
337338aa15dSMax Chou     MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
338338aa15dSMax Chou 
339338aa15dSMax Chou     /* Check page permission/pmp/watchpoint/etc. */
340338aa15dSMax Chou     flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type,
341338aa15dSMax Chou                                mmu_index, true, &host, ra);
342338aa15dSMax Chou 
343338aa15dSMax Chou     if (flags == 0) {
344e3298878SMax Chou         if (nf == 1) {
345e3298878SMax Chou             vext_continus_ldst_host(env, ldst_host, vd, evl, env->vstart, host,
346e3298878SMax Chou                                     esz, is_load);
347e3298878SMax Chou         } else {
348338aa15dSMax Chou             for (i = env->vstart; i < evl; ++i) {
349338aa15dSMax Chou                 k = 0;
350338aa15dSMax Chou                 while (k < nf) {
351338aa15dSMax Chou                     ldst_host(vd, i + k * max_elems, host);
352338aa15dSMax Chou                     host += esz;
353338aa15dSMax Chou                     k++;
354338aa15dSMax Chou                 }
355338aa15dSMax Chou             }
356e3298878SMax Chou         }
357338aa15dSMax Chou         env->vstart += elems;
358338aa15dSMax Chou     } else {
359e3298878SMax Chou         if (nf == 1) {
360e3298878SMax Chou             vext_continus_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart,
361e3298878SMax Chou                                    ra, esz, is_load);
362e3298878SMax Chou         } else {
363338aa15dSMax Chou             /* load bytes from guest memory */
364338aa15dSMax Chou             for (i = env->vstart; i < evl; env->vstart = ++i) {
365338aa15dSMax Chou                 k = 0;
366338aa15dSMax Chou                 while (k < nf) {
367e3298878SMax Chou                     ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
368e3298878SMax Chou                              vd, ra);
369338aa15dSMax Chou                     addr += esz;
370338aa15dSMax Chou                     k++;
371338aa15dSMax Chou                 }
372338aa15dSMax Chou             }
373338aa15dSMax Chou         }
374338aa15dSMax Chou     }
375e3298878SMax Chou }
376338aa15dSMax Chou 
377f8ee6f53SMax Chou static inline QEMU_ALWAYS_INLINE void
vext_ldst_us(void * vd,target_ulong base,CPURISCVState * env,uint32_t desc,vext_ldst_elem_fn_tlb * ldst_tlb,vext_ldst_elem_fn_host * ldst_host,uint32_t log2_esz,uint32_t evl,uintptr_t ra,bool is_load)378338aa15dSMax Chou vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
379338aa15dSMax Chou              vext_ldst_elem_fn_tlb *ldst_tlb,
380338aa15dSMax Chou              vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
381338aa15dSMax Chou              uint32_t evl, uintptr_t ra, bool is_load)
382338aa15dSMax Chou {
383338aa15dSMax Chou     uint32_t k;
384338aa15dSMax Chou     target_ulong page_split, elems, addr;
385751538d5SLIU Zhiwei     uint32_t nf = vext_nf(desc);
386c7b8a421SeopXD     uint32_t max_elems = vext_max_elems(desc, log2_esz);
387752614caSeopXD     uint32_t esz = 1 << log2_esz;
388338aa15dSMax Chou     uint32_t msize = nf * esz;
389338aa15dSMax Chou     int mmu_index = riscv_env_mmu_index(env, false);
390751538d5SLIU Zhiwei 
3917f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, evl);
392df4252b2SDaniel Henrique Barboza 
393338aa15dSMax Chou     /* Calculate the page range of first page */
394338aa15dSMax Chou     addr = base + ((env->vstart * nf) << log2_esz);
395338aa15dSMax Chou     page_split = -(addr | TARGET_PAGE_MASK);
396338aa15dSMax Chou     /* Get number of elements */
397338aa15dSMax Chou     elems = page_split / msize;
398338aa15dSMax Chou     if (unlikely(env->vstart + elems >= evl)) {
399338aa15dSMax Chou         elems = evl - env->vstart;
400751538d5SLIU Zhiwei     }
401e130683fSDaniel Henrique Barboza 
402338aa15dSMax Chou     /* Load/store elements in the first page */
403338aa15dSMax Chou     if (likely(elems)) {
404338aa15dSMax Chou         vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
405338aa15dSMax Chou                           is_load, mmu_index, ldst_tlb, ldst_host, ra);
406338aa15dSMax Chou     }
407338aa15dSMax Chou 
408338aa15dSMax Chou     /* Load/store elements in the second page */
409338aa15dSMax Chou     if (unlikely(env->vstart < evl)) {
410338aa15dSMax Chou         /* Cross page element */
411338aa15dSMax Chou         if (unlikely(page_split % msize)) {
412338aa15dSMax Chou             for (k = 0; k < nf; k++) {
413338aa15dSMax Chou                 addr = base + ((env->vstart * nf + k) << log2_esz);
414338aa15dSMax Chou                 ldst_tlb(env, adjust_addr(env, addr),
415338aa15dSMax Chou                         env->vstart + k * max_elems, vd, ra);
416338aa15dSMax Chou             }
417338aa15dSMax Chou             env->vstart++;
418338aa15dSMax Chou         }
419338aa15dSMax Chou 
420338aa15dSMax Chou         addr = base + ((env->vstart * nf) << log2_esz);
421338aa15dSMax Chou         /* Get number of elements of second page */
422338aa15dSMax Chou         elems = evl - env->vstart;
423338aa15dSMax Chou 
424338aa15dSMax Chou         /* Load/store elements in the second page */
425338aa15dSMax Chou         vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
426338aa15dSMax Chou                           is_load, mmu_index, ldst_tlb, ldst_host, ra);
427338aa15dSMax Chou     }
428338aa15dSMax Chou 
429338aa15dSMax Chou     env->vstart = 0;
430949b6bcbSXiao Wang     vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
431751538d5SLIU Zhiwei }
432751538d5SLIU Zhiwei 
433751538d5SLIU Zhiwei /*
434246f8796SWeiwei Li  * masked unit-stride load and store operation will be a special case of
435fba59e0fSXiao Wang  * stride, stride = NF * sizeof (ETYPE)
436751538d5SLIU Zhiwei  */
437751538d5SLIU Zhiwei 
438338aa15dSMax Chou #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST)      \
439751538d5SLIU Zhiwei void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,     \
440751538d5SLIU Zhiwei                          CPURISCVState *env, uint32_t desc)         \
441751538d5SLIU Zhiwei {                                                                   \
4425a9f8e15SFrank Chang     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));         \
443338aa15dSMax Chou     vext_ldst_stride(vd, v0, base, stride, env, desc, false,        \
444338aa15dSMax Chou                      LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC());    \
445751538d5SLIU Zhiwei }                                                                   \
446751538d5SLIU Zhiwei                                                                     \
447751538d5SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong base,            \
448751538d5SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                \
449751538d5SLIU Zhiwei {                                                                   \
450338aa15dSMax Chou     vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST,    \
451338aa15dSMax Chou                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), true);      \
452751538d5SLIU Zhiwei }
453751538d5SLIU Zhiwei 
454338aa15dSMax Chou GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b_tlb, lde_b_host)
455338aa15dSMax Chou GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host)
456338aa15dSMax Chou GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host)
GEN_VEXT_LD_US(vle8_v,int8_t,lde_b_tlb,lde_b_host)457338aa15dSMax Chou GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host)
458751538d5SLIU Zhiwei 
459338aa15dSMax Chou #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST)         \
460751538d5SLIU Zhiwei void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,          \
461751538d5SLIU Zhiwei                          CPURISCVState *env, uint32_t desc)              \
462751538d5SLIU Zhiwei {                                                                        \
4635a9f8e15SFrank Chang     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));              \
464338aa15dSMax Chou     vext_ldst_stride(vd, v0, base, stride, env, desc, false,             \
465338aa15dSMax Chou                      STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC());        \
466751538d5SLIU Zhiwei }                                                                        \
467751538d5SLIU Zhiwei                                                                          \
468751538d5SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong base,                 \
469751538d5SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                     \
470751538d5SLIU Zhiwei {                                                                        \
471338aa15dSMax Chou     vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST,       \
472338aa15dSMax Chou                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), false);          \
473751538d5SLIU Zhiwei }
474751538d5SLIU Zhiwei 
475338aa15dSMax Chou GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b_tlb, ste_b_host)
476338aa15dSMax Chou GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host)
477338aa15dSMax Chou GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host)
478338aa15dSMax Chou GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host)
479f732560eSLIU Zhiwei 
480f732560eSLIU Zhiwei /*
4813b57254dSWeiwei Li  * unit stride mask load and store, EEW = 1
48226086aeaSFrank Chang  */
48326086aeaSFrank Chang void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
48426086aeaSFrank Chang                     CPURISCVState *env, uint32_t desc)
48526086aeaSFrank Chang {
48626086aeaSFrank Chang     /* evl = ceil(vl/8) */
48726086aeaSFrank Chang     uint8_t evl = (env->vl + 7) >> 3;
488338aa15dSMax Chou     vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host,
489338aa15dSMax Chou                  0, evl, GETPC(), true);
49026086aeaSFrank Chang }
49126086aeaSFrank Chang 
49226086aeaSFrank Chang void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
49326086aeaSFrank Chang                     CPURISCVState *env, uint32_t desc)
49426086aeaSFrank Chang {
HELPER(vsm_v)49526086aeaSFrank Chang     /* evl = ceil(vl/8) */
49626086aeaSFrank Chang     uint8_t evl = (env->vl + 7) >> 3;
497338aa15dSMax Chou     vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host,
498338aa15dSMax Chou                  0, evl, GETPC(), false);
49926086aeaSFrank Chang }
50026086aeaSFrank Chang 
50126086aeaSFrank Chang /*
5023b57254dSWeiwei Li  * index: access vector element from indexed memory
503f732560eSLIU Zhiwei  */
504f732560eSLIU Zhiwei typedef target_ulong vext_get_index_addr(target_ulong base,
505f732560eSLIU Zhiwei         uint32_t idx, void *vs2);
506f732560eSLIU Zhiwei 
507f732560eSLIU Zhiwei #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
508f732560eSLIU Zhiwei static target_ulong NAME(target_ulong base,            \
509f732560eSLIU Zhiwei                          uint32_t idx, void *vs2)      \
510f732560eSLIU Zhiwei {                                                      \
511f732560eSLIU Zhiwei     return (base + *((ETYPE *)vs2 + H(idx)));          \
512f732560eSLIU Zhiwei }
513f732560eSLIU Zhiwei 
51483fcd573SFrank Chang GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
51583fcd573SFrank Chang GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
51683fcd573SFrank Chang GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
GEN_VEXT_GET_INDEX_ADDR(idx_b,uint8_t,H1)51783fcd573SFrank Chang GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
518f732560eSLIU Zhiwei 
519f732560eSLIU Zhiwei static inline void
520f732560eSLIU Zhiwei vext_ldst_index(void *vd, void *v0, target_ulong base,
521f732560eSLIU Zhiwei                 void *vs2, CPURISCVState *env, uint32_t desc,
522f732560eSLIU Zhiwei                 vext_get_index_addr get_index_addr,
523338aa15dSMax Chou                 vext_ldst_elem_fn_tlb *ldst_elem,
524c7b8a421SeopXD                 uint32_t log2_esz, uintptr_t ra)
525f732560eSLIU Zhiwei {
526f732560eSLIU Zhiwei     uint32_t i, k;
527f732560eSLIU Zhiwei     uint32_t nf = vext_nf(desc);
528f732560eSLIU Zhiwei     uint32_t vm = vext_vm(desc);
529c7b8a421SeopXD     uint32_t max_elems = vext_max_elems(desc, log2_esz);
530752614caSeopXD     uint32_t esz = 1 << log2_esz;
531265ecd4cSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);
532f732560eSLIU Zhiwei 
5337f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, env->vl);
534df4252b2SDaniel Henrique Barboza 
535f732560eSLIU Zhiwei     /* load bytes from guest memory */
5360a11629cSDaniel Henrique Barboza     for (i = env->vstart; i < env->vl; env->vstart = ++i) {
537f714361eSFrank Chang         k = 0;
538f732560eSLIU Zhiwei         while (k < nf) {
539265ecd4cSYueh-Ting (eop) Chen             if (!vm && !vext_elem_mask(v0, i)) {
540265ecd4cSYueh-Ting (eop) Chen                 /* set masked-off elements to 1s */
541265ecd4cSYueh-Ting (eop) Chen                 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
542265ecd4cSYueh-Ting (eop) Chen                                   (i + k * max_elems + 1) * esz);
543265ecd4cSYueh-Ting (eop) Chen                 k++;
544265ecd4cSYueh-Ting (eop) Chen                 continue;
545265ecd4cSYueh-Ting (eop) Chen             }
546c7b8a421SeopXD             abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
547d6b9d930SLIU Zhiwei             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
548f732560eSLIU Zhiwei             k++;
549f732560eSLIU Zhiwei         }
550f732560eSLIU Zhiwei     }
551f714361eSFrank Chang     env->vstart = 0;
552e130683fSDaniel Henrique Barboza 
553949b6bcbSXiao Wang     vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
554f732560eSLIU Zhiwei }
555f732560eSLIU Zhiwei 
55608b9d0edSFrank Chang #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
557f732560eSLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
558f732560eSLIU Zhiwei                   void *vs2, CPURISCVState *env, uint32_t desc)            \
559f732560eSLIU Zhiwei {                                                                          \
560f732560eSLIU Zhiwei     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
56125eae048SeopXD                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC());                \
562f732560eSLIU Zhiwei }
563f732560eSLIU Zhiwei 
564338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b_tlb)
565338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h_tlb)
566338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w_tlb)
GEN_VEXT_LD_INDEX(vlxei8_8_v,int8_t,idx_b,lde_b_tlb)567338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d_tlb)
568338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b_tlb)
569338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb)
570338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb)
571338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb)
572338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b_tlb)
573338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb)
574338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb)
575338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb)
576338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b_tlb)
577338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb)
578338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb)
579338aa15dSMax Chou GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb)
580f732560eSLIU Zhiwei 
58108b9d0edSFrank Chang #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
582f732560eSLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
583f732560eSLIU Zhiwei                   void *vs2, CPURISCVState *env, uint32_t desc)  \
584f732560eSLIU Zhiwei {                                                                \
585f732560eSLIU Zhiwei     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
5865a9f8e15SFrank Chang                     STORE_FN, ctzl(sizeof(ETYPE)),               \
58725eae048SeopXD                     GETPC());                                    \
588f732560eSLIU Zhiwei }
589f732560eSLIU Zhiwei 
590338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b_tlb)
591338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h_tlb)
592338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w_tlb)
593338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d_tlb)
594338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b_tlb)
595338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb)
596338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb)
597338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb)
598338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b_tlb)
599338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb)
600338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb)
601338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb)
602338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b_tlb)
603338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb)
604338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb)
605338aa15dSMax Chou GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb)
606022b4ecfSLIU Zhiwei 
607022b4ecfSLIU Zhiwei /*
6083b57254dSWeiwei Li  * unit-stride fault-only-fisrt load instructions
609022b4ecfSLIU Zhiwei  */
610022b4ecfSLIU Zhiwei static inline void
611f0008926SMax Chou vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env,
612f0008926SMax Chou           uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb,
613f0008926SMax Chou           vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra)
614022b4ecfSLIU Zhiwei {
615022b4ecfSLIU Zhiwei     uint32_t i, k, vl = 0;
616022b4ecfSLIU Zhiwei     uint32_t nf = vext_nf(desc);
617022b4ecfSLIU Zhiwei     uint32_t vm = vext_vm(desc);
618c7b8a421SeopXD     uint32_t max_elems = vext_max_elems(desc, log2_esz);
619752614caSeopXD     uint32_t esz = 1 << log2_esz;
620f0008926SMax Chou     uint32_t msize = nf * esz;
621265ecd4cSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);
622f0008926SMax Chou     target_ulong addr, offset, remain, page_split, elems;
623d9996d09SRichard Henderson     int mmu_index = riscv_env_mmu_index(env, false);
624022b4ecfSLIU Zhiwei 
6257f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, env->vl);
626df4252b2SDaniel Henrique Barboza 
627022b4ecfSLIU Zhiwei     /* probe every access */
628f714361eSFrank Chang     for (i = env->vstart; i < env->vl; i++) {
629f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {
630022b4ecfSLIU Zhiwei             continue;
631022b4ecfSLIU Zhiwei         }
632c7b8a421SeopXD         addr = adjust_addr(env, base + i * (nf << log2_esz));
633022b4ecfSLIU Zhiwei         if (i == 0) {
6343f57638aSRichard Henderson             /* Allow fault on first element. */
635c7b8a421SeopXD             probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
636022b4ecfSLIU Zhiwei         } else {
637c7b8a421SeopXD             remain = nf << log2_esz;
638022b4ecfSLIU Zhiwei             while (remain > 0) {
6393f57638aSRichard Henderson                 void *host;
6403f57638aSRichard Henderson                 int flags;
6413f57638aSRichard Henderson 
642022b4ecfSLIU Zhiwei                 offset = -(addr | TARGET_PAGE_MASK);
6433f57638aSRichard Henderson 
6443f57638aSRichard Henderson                 /* Probe nonfault on subsequent elements. */
6453f57638aSRichard Henderson                 flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
6463f57638aSRichard Henderson                                            mmu_index, true, &host, 0);
6473f57638aSRichard Henderson 
6483f57638aSRichard Henderson                 /*
6493f57638aSRichard Henderson                  * Stop if invalid (unmapped) or mmio (transaction may fail).
6503f57638aSRichard Henderson                  * Do not stop if watchpoint, as the spec says that
6513f57638aSRichard Henderson                  * first-fault should continue to access the same
6523f57638aSRichard Henderson                  * elements regardless of any watchpoint.
6533f57638aSRichard Henderson                  */
6543f57638aSRichard Henderson                 if (flags & ~TLB_WATCHPOINT) {
655022b4ecfSLIU Zhiwei                     vl = i;
656022b4ecfSLIU Zhiwei                     goto ProbeSuccess;
657022b4ecfSLIU Zhiwei                 }
658022b4ecfSLIU Zhiwei                 if (remain <= offset) {
659022b4ecfSLIU Zhiwei                     break;
660022b4ecfSLIU Zhiwei                 }
661022b4ecfSLIU Zhiwei                 remain -= offset;
662d6b9d930SLIU Zhiwei                 addr = adjust_addr(env, addr + offset);
663022b4ecfSLIU Zhiwei             }
664022b4ecfSLIU Zhiwei         }
665022b4ecfSLIU Zhiwei     }
666022b4ecfSLIU Zhiwei ProbeSuccess:
667022b4ecfSLIU Zhiwei     /* load bytes from guest memory */
668022b4ecfSLIU Zhiwei     if (vl != 0) {
669022b4ecfSLIU Zhiwei         env->vl = vl;
670022b4ecfSLIU Zhiwei     }
671f0008926SMax Chou 
672f0008926SMax Chou     if (env->vstart < env->vl) {
673f0008926SMax Chou         if (vm) {
674f0008926SMax Chou             /* Calculate the page range of first page */
675f0008926SMax Chou             addr = base + ((env->vstart * nf) << log2_esz);
676f0008926SMax Chou             page_split = -(addr | TARGET_PAGE_MASK);
677f0008926SMax Chou             /* Get number of elements */
678f0008926SMax Chou             elems = page_split / msize;
679f0008926SMax Chou             if (unlikely(env->vstart + elems >= env->vl)) {
680f0008926SMax Chou                 elems = env->vl - env->vstart;
681f0008926SMax Chou             }
682f0008926SMax Chou 
683f0008926SMax Chou             /* Load/store elements in the first page */
684f0008926SMax Chou             if (likely(elems)) {
685f0008926SMax Chou                 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
686f0008926SMax Chou                                   log2_esz, true, mmu_index, ldst_tlb,
687f0008926SMax Chou                                   ldst_host, ra);
688f0008926SMax Chou             }
689f0008926SMax Chou 
690f0008926SMax Chou             /* Load/store elements in the second page */
691f0008926SMax Chou             if (unlikely(env->vstart < env->vl)) {
692f0008926SMax Chou                 /* Cross page element */
693f0008926SMax Chou                 if (unlikely(page_split % msize)) {
694f0008926SMax Chou                     for (k = 0; k < nf; k++) {
695f0008926SMax Chou                         addr = base + ((env->vstart * nf + k) << log2_esz);
696f0008926SMax Chou                         ldst_tlb(env, adjust_addr(env, addr),
697f0008926SMax Chou                                  env->vstart + k * max_elems, vd, ra);
698f0008926SMax Chou                     }
699f0008926SMax Chou                     env->vstart++;
700f0008926SMax Chou                 }
701f0008926SMax Chou 
702f0008926SMax Chou                 addr = base + ((env->vstart * nf) << log2_esz);
703f0008926SMax Chou                 /* Get number of elements of second page */
704f0008926SMax Chou                 elems = env->vl - env->vstart;
705f0008926SMax Chou 
706f0008926SMax Chou                 /* Load/store elements in the second page */
707f0008926SMax Chou                 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
708f0008926SMax Chou                                   log2_esz, true, mmu_index, ldst_tlb,
709f0008926SMax Chou                                   ldst_host, ra);
710f0008926SMax Chou             }
711f0008926SMax Chou         } else {
712f714361eSFrank Chang             for (i = env->vstart; i < env->vl; i++) {
713022b4ecfSLIU Zhiwei                 k = 0;
714265ecd4cSYueh-Ting (eop) Chen                 while (k < nf) {
715f0008926SMax Chou                     if (!vext_elem_mask(v0, i)) {
716265ecd4cSYueh-Ting (eop) Chen                         /* set masked-off elements to 1s */
717265ecd4cSYueh-Ting (eop) Chen                         vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
718265ecd4cSYueh-Ting (eop) Chen                                           (i + k * max_elems + 1) * esz);
719265ecd4cSYueh-Ting (eop) Chen                         k++;
720022b4ecfSLIU Zhiwei                         continue;
721022b4ecfSLIU Zhiwei                     }
722f3f65c40SAlistair Francis                     addr = base + ((i * nf + k) << log2_esz);
723f0008926SMax Chou                     ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
724f0008926SMax Chou                              vd, ra);
725022b4ecfSLIU Zhiwei                     k++;
726022b4ecfSLIU Zhiwei                 }
727022b4ecfSLIU Zhiwei             }
728f0008926SMax Chou         }
729f0008926SMax Chou     }
730f714361eSFrank Chang     env->vstart = 0;
731e130683fSDaniel Henrique Barboza 
732949b6bcbSXiao Wang     vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
733022b4ecfSLIU Zhiwei }
734022b4ecfSLIU Zhiwei 
735f0008926SMax Chou #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST)   \
736022b4ecfSLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong base,        \
737022b4ecfSLIU Zhiwei                   CPURISCVState *env, uint32_t desc)            \
738022b4ecfSLIU Zhiwei {                                                               \
739f0008926SMax Chou     vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB,             \
740f0008926SMax Chou               LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC());      \
741022b4ecfSLIU Zhiwei }
742022b4ecfSLIU Zhiwei 
743f0008926SMax Chou GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b_tlb, lde_b_host)
744f0008926SMax Chou GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host)
745f0008926SMax Chou GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host)
GEN_VEXT_LDFF(vle8ff_v,int8_t,lde_b_tlb,lde_b_host)746f0008926SMax Chou GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host)
747268fcca6SLIU Zhiwei 
748268fcca6SLIU Zhiwei #define DO_SWAP(N, M) (M)
749268fcca6SLIU Zhiwei #define DO_AND(N, M)  (N & M)
750268fcca6SLIU Zhiwei #define DO_XOR(N, M)  (N ^ M)
751268fcca6SLIU Zhiwei #define DO_OR(N, M)   (N | M)
752268fcca6SLIU Zhiwei #define DO_ADD(N, M)  (N + M)
753268fcca6SLIU Zhiwei 
754268fcca6SLIU Zhiwei /* Signed min/max */
755268fcca6SLIU Zhiwei #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
756268fcca6SLIU Zhiwei #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
757268fcca6SLIU Zhiwei 
75843740e3aSLIU Zhiwei /*
7593b57254dSWeiwei Li  * load and store whole register instructions
76030206bd8SFrank Chang  */
761f8ee6f53SMax Chou static inline QEMU_ALWAYS_INLINE void
76230206bd8SFrank Chang vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
7633333000fSMax Chou                 vext_ldst_elem_fn_tlb *ldst_tlb,
7643333000fSMax Chou                 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
7653333000fSMax Chou                 uintptr_t ra, bool is_load)
76630206bd8SFrank Chang {
7673333000fSMax Chou     target_ulong page_split, elems, addr;
76830206bd8SFrank Chang     uint32_t nf = vext_nf(desc);
76958bc9063SDaniel Henrique Barboza     uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
770c7b8a421SeopXD     uint32_t max_elems = vlenb >> log2_esz;
7713333000fSMax Chou     uint32_t evl = nf * max_elems;
7723333000fSMax Chou     uint32_t esz = 1 << log2_esz;
7733333000fSMax Chou     int mmu_index = riscv_env_mmu_index(env, false);
77430206bd8SFrank Chang 
7753333000fSMax Chou     /* Calculate the page range of first page */
7763333000fSMax Chou     addr = base + (env->vstart << log2_esz);
7773333000fSMax Chou     page_split = -(addr | TARGET_PAGE_MASK);
7783333000fSMax Chou     /* Get number of elements */
7793333000fSMax Chou     elems = page_split / esz;
7803333000fSMax Chou     if (unlikely(env->vstart + elems >= evl)) {
7813333000fSMax Chou         elems = evl - env->vstart;
782929e521aSDaniel Henrique Barboza     }
783929e521aSDaniel Henrique Barboza 
7843333000fSMax Chou     /* Load/store elements in the first page */
7853333000fSMax Chou     if (likely(elems)) {
7863333000fSMax Chou         vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
7873333000fSMax Chou                           is_load, mmu_index, ldst_tlb, ldst_host, ra);
788f714361eSFrank Chang     }
789f714361eSFrank Chang 
7903333000fSMax Chou     /* Load/store elements in the second page */
7913333000fSMax Chou     if (unlikely(env->vstart < evl)) {
7923333000fSMax Chou         /* Cross page element */
7933333000fSMax Chou         if (unlikely(page_split % esz)) {
7943333000fSMax Chou             addr = base + (env->vstart << log2_esz);
7953333000fSMax Chou             ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra);
7963333000fSMax Chou             env->vstart++;
79730206bd8SFrank Chang         }
7983333000fSMax Chou 
7993333000fSMax Chou         addr = base + (env->vstart << log2_esz);
8003333000fSMax Chou         /* Get number of elements of second page */
8013333000fSMax Chou         elems = evl - env->vstart;
8023333000fSMax Chou 
8033333000fSMax Chou         /* Load/store elements in the second page */
8043333000fSMax Chou         vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
8053333000fSMax Chou                           is_load, mmu_index, ldst_tlb, ldst_host, ra);
80630206bd8SFrank Chang     }
807f714361eSFrank Chang 
808f714361eSFrank Chang     env->vstart = 0;
80930206bd8SFrank Chang }
81030206bd8SFrank Chang 
8113333000fSMax Chou #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST)   \
8123333000fSMax Chou void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env,  \
8133333000fSMax Chou                   uint32_t desc)                                    \
81430206bd8SFrank Chang {                                                                   \
8153333000fSMax Chou     vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
8163333000fSMax Chou                     ctzl(sizeof(ETYPE)), GETPC(), true);            \
81730206bd8SFrank Chang }
81830206bd8SFrank Chang 
8193333000fSMax Chou GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b_tlb, lde_b_host)
8203333000fSMax Chou GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host)
8213333000fSMax Chou GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host)
GEN_VEXT_LD_WHOLE(vl1re8_v,int8_t,lde_b_tlb,lde_b_host)8223333000fSMax Chou GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host)
8233333000fSMax Chou GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b_tlb, lde_b_host)
8243333000fSMax Chou GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host)
8253333000fSMax Chou GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host)
8263333000fSMax Chou GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host)
8273333000fSMax Chou GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b_tlb, lde_b_host)
8283333000fSMax Chou GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host)
8293333000fSMax Chou GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host)
8303333000fSMax Chou GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host)
8313333000fSMax Chou GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b_tlb, lde_b_host)
8323333000fSMax Chou GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host)
8333333000fSMax Chou GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host)
8343333000fSMax Chou GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host)
83530206bd8SFrank Chang 
8363333000fSMax Chou #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST)     \
8373333000fSMax Chou void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env,      \
8383333000fSMax Chou                   uint32_t desc)                                        \
83930206bd8SFrank Chang {                                                                       \
8403333000fSMax Chou     vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST,   \
8413333000fSMax Chou                     ctzl(sizeof(ETYPE)), GETPC(), false);               \
84230206bd8SFrank Chang }
84330206bd8SFrank Chang 
8443333000fSMax Chou GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host)
8453333000fSMax Chou GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host)
8463333000fSMax Chou GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host)
8473333000fSMax Chou GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host)
84830206bd8SFrank Chang 
84930206bd8SFrank Chang /*
8503b57254dSWeiwei Li  * Vector Integer Arithmetic Instructions
85143740e3aSLIU Zhiwei  */
85243740e3aSLIU Zhiwei 
85343740e3aSLIU Zhiwei /* (TD, T1, T2, TX1, TX2) */
85443740e3aSLIU Zhiwei #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
85543740e3aSLIU Zhiwei #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
85643740e3aSLIU Zhiwei #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
85743740e3aSLIU Zhiwei #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
858958b85f3SLIU Zhiwei #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
859958b85f3SLIU Zhiwei #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
860958b85f3SLIU Zhiwei #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
861958b85f3SLIU Zhiwei #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
86297b1cba3SLIU Zhiwei #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
86397b1cba3SLIU Zhiwei #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
86497b1cba3SLIU Zhiwei #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
86597b1cba3SLIU Zhiwei #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
86697b1cba3SLIU Zhiwei #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
86797b1cba3SLIU Zhiwei #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
86897b1cba3SLIU Zhiwei #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
86997b1cba3SLIU Zhiwei #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
87097b1cba3SLIU Zhiwei #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
8719ff3d287SLIU Zhiwei #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
8729ff3d287SLIU Zhiwei #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
8739ff3d287SLIU Zhiwei #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
8749ff3d287SLIU Zhiwei #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
8759ff3d287SLIU Zhiwei #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
8769ff3d287SLIU Zhiwei #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
87743740e3aSLIU Zhiwei 
87843740e3aSLIU Zhiwei #define DO_SUB(N, M) (N - M)
87943740e3aSLIU Zhiwei #define DO_RSUB(N, M) (M - N)
88043740e3aSLIU Zhiwei 
88143740e3aSLIU Zhiwei RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
88243740e3aSLIU Zhiwei RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
88343740e3aSLIU Zhiwei RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
88443740e3aSLIU Zhiwei RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
88543740e3aSLIU Zhiwei RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
88643740e3aSLIU Zhiwei RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
88743740e3aSLIU Zhiwei RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
88843740e3aSLIU Zhiwei RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
88943740e3aSLIU Zhiwei 
890f1eed927SeopXD GEN_VEXT_VV(vadd_vv_b, 1)
891f1eed927SeopXD GEN_VEXT_VV(vadd_vv_h, 2)
892f1eed927SeopXD GEN_VEXT_VV(vadd_vv_w, 4)
893f1eed927SeopXD GEN_VEXT_VV(vadd_vv_d, 8)
894f1eed927SeopXD GEN_VEXT_VV(vsub_vv_b, 1)
895f1eed927SeopXD GEN_VEXT_VV(vsub_vv_h, 2)
896f1eed927SeopXD GEN_VEXT_VV(vsub_vv_w, 4)
897f1eed927SeopXD GEN_VEXT_VV(vsub_vv_d, 8)
89843740e3aSLIU Zhiwei 
89943740e3aSLIU Zhiwei 
90043740e3aSLIU Zhiwei RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
90143740e3aSLIU Zhiwei RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
90243740e3aSLIU Zhiwei RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
90343740e3aSLIU Zhiwei RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
90443740e3aSLIU Zhiwei RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
90543740e3aSLIU Zhiwei RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
90643740e3aSLIU Zhiwei RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
90743740e3aSLIU Zhiwei RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
90843740e3aSLIU Zhiwei RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
90943740e3aSLIU Zhiwei RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
91043740e3aSLIU Zhiwei RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
91143740e3aSLIU Zhiwei RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
91243740e3aSLIU Zhiwei 
9135c19fc15SeopXD GEN_VEXT_VX(vadd_vx_b, 1)
9145c19fc15SeopXD GEN_VEXT_VX(vadd_vx_h, 2)
9155c19fc15SeopXD GEN_VEXT_VX(vadd_vx_w, 4)
9165c19fc15SeopXD GEN_VEXT_VX(vadd_vx_d, 8)
9175c19fc15SeopXD GEN_VEXT_VX(vsub_vx_b, 1)
9185c19fc15SeopXD GEN_VEXT_VX(vsub_vx_h, 2)
9195c19fc15SeopXD GEN_VEXT_VX(vsub_vx_w, 4)
9205c19fc15SeopXD GEN_VEXT_VX(vsub_vx_d, 8)
9215c19fc15SeopXD GEN_VEXT_VX(vrsub_vx_b, 1)
9225c19fc15SeopXD GEN_VEXT_VX(vrsub_vx_h, 2)
9235c19fc15SeopXD GEN_VEXT_VX(vrsub_vx_w, 4)
9245c19fc15SeopXD GEN_VEXT_VX(vrsub_vx_d, 8)
92543740e3aSLIU Zhiwei 
92643740e3aSLIU Zhiwei void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
92743740e3aSLIU Zhiwei {
92843740e3aSLIU Zhiwei     intptr_t oprsz = simd_oprsz(desc);
92943740e3aSLIU Zhiwei     intptr_t i;
93043740e3aSLIU Zhiwei 
93143740e3aSLIU Zhiwei     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
93243740e3aSLIU Zhiwei         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
93343740e3aSLIU Zhiwei     }
93443740e3aSLIU Zhiwei }
93543740e3aSLIU Zhiwei 
93643740e3aSLIU Zhiwei void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
93743740e3aSLIU Zhiwei {
93843740e3aSLIU Zhiwei     intptr_t oprsz = simd_oprsz(desc);
HELPER(vec_rsubs16)93943740e3aSLIU Zhiwei     intptr_t i;
94043740e3aSLIU Zhiwei 
94143740e3aSLIU Zhiwei     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
94243740e3aSLIU Zhiwei         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
94343740e3aSLIU Zhiwei     }
94443740e3aSLIU Zhiwei }
94543740e3aSLIU Zhiwei 
94643740e3aSLIU Zhiwei void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
94743740e3aSLIU Zhiwei {
94843740e3aSLIU Zhiwei     intptr_t oprsz = simd_oprsz(desc);
HELPER(vec_rsubs32)94943740e3aSLIU Zhiwei     intptr_t i;
95043740e3aSLIU Zhiwei 
95143740e3aSLIU Zhiwei     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
95243740e3aSLIU Zhiwei         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
95343740e3aSLIU Zhiwei     }
95443740e3aSLIU Zhiwei }
95543740e3aSLIU Zhiwei 
95643740e3aSLIU Zhiwei void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
95743740e3aSLIU Zhiwei {
95843740e3aSLIU Zhiwei     intptr_t oprsz = simd_oprsz(desc);
HELPER(vec_rsubs64)95943740e3aSLIU Zhiwei     intptr_t i;
96043740e3aSLIU Zhiwei 
96143740e3aSLIU Zhiwei     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
96243740e3aSLIU Zhiwei         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
96343740e3aSLIU Zhiwei     }
96443740e3aSLIU Zhiwei }
9658fcdf776SLIU Zhiwei 
9668fcdf776SLIU Zhiwei /* Vector Widening Integer Add/Subtract */
9678fcdf776SLIU Zhiwei #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
9688fcdf776SLIU Zhiwei #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
9698fcdf776SLIU Zhiwei #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
9708fcdf776SLIU Zhiwei #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
9718fcdf776SLIU Zhiwei #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
9728fcdf776SLIU Zhiwei #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
9738fcdf776SLIU Zhiwei #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
9748fcdf776SLIU Zhiwei #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
9758fcdf776SLIU Zhiwei #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
9768fcdf776SLIU Zhiwei #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
9778fcdf776SLIU Zhiwei #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
9788fcdf776SLIU Zhiwei #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
9798fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
9808fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
9818fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
RVVCALL(OPIVV2,vwaddu_vv_b,WOP_UUU_B,H2,H1,H1,DO_ADD)9828fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
9838fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
9848fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
9858fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
9868fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
9878fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
9888fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
9898fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
9908fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
9918fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
9928fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
9938fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
9948fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
9958fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
9968fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
9978fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
9988fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
9998fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
10008fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
10018fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
10028fcdf776SLIU Zhiwei RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
1003f1eed927SeopXD GEN_VEXT_VV(vwaddu_vv_b, 2)
1004f1eed927SeopXD GEN_VEXT_VV(vwaddu_vv_h, 4)
1005f1eed927SeopXD GEN_VEXT_VV(vwaddu_vv_w, 8)
1006f1eed927SeopXD GEN_VEXT_VV(vwsubu_vv_b, 2)
1007f1eed927SeopXD GEN_VEXT_VV(vwsubu_vv_h, 4)
1008f1eed927SeopXD GEN_VEXT_VV(vwsubu_vv_w, 8)
1009f1eed927SeopXD GEN_VEXT_VV(vwadd_vv_b, 2)
1010f1eed927SeopXD GEN_VEXT_VV(vwadd_vv_h, 4)
1011f1eed927SeopXD GEN_VEXT_VV(vwadd_vv_w, 8)
1012f1eed927SeopXD GEN_VEXT_VV(vwsub_vv_b, 2)
1013f1eed927SeopXD GEN_VEXT_VV(vwsub_vv_h, 4)
1014f1eed927SeopXD GEN_VEXT_VV(vwsub_vv_w, 8)
1015f1eed927SeopXD GEN_VEXT_VV(vwaddu_wv_b, 2)
1016f1eed927SeopXD GEN_VEXT_VV(vwaddu_wv_h, 4)
1017f1eed927SeopXD GEN_VEXT_VV(vwaddu_wv_w, 8)
1018f1eed927SeopXD GEN_VEXT_VV(vwsubu_wv_b, 2)
1019f1eed927SeopXD GEN_VEXT_VV(vwsubu_wv_h, 4)
1020f1eed927SeopXD GEN_VEXT_VV(vwsubu_wv_w, 8)
1021f1eed927SeopXD GEN_VEXT_VV(vwadd_wv_b, 2)
1022f1eed927SeopXD GEN_VEXT_VV(vwadd_wv_h, 4)
1023f1eed927SeopXD GEN_VEXT_VV(vwadd_wv_w, 8)
1024f1eed927SeopXD GEN_VEXT_VV(vwsub_wv_b, 2)
1025f1eed927SeopXD GEN_VEXT_VV(vwsub_wv_h, 4)
1026f1eed927SeopXD GEN_VEXT_VV(vwsub_wv_w, 8)
10278fcdf776SLIU Zhiwei 
10288fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
10298fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
10308fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
10318fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
10328fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
10338fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
10348fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
10358fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
10368fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
10378fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
10388fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
10398fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
10408fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
10418fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
10428fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
10438fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
10448fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
10458fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
10468fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
10478fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
10488fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
10498fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
10508fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
10518fcdf776SLIU Zhiwei RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
10525c19fc15SeopXD GEN_VEXT_VX(vwaddu_vx_b, 2)
10535c19fc15SeopXD GEN_VEXT_VX(vwaddu_vx_h, 4)
10545c19fc15SeopXD GEN_VEXT_VX(vwaddu_vx_w, 8)
10555c19fc15SeopXD GEN_VEXT_VX(vwsubu_vx_b, 2)
10565c19fc15SeopXD GEN_VEXT_VX(vwsubu_vx_h, 4)
10575c19fc15SeopXD GEN_VEXT_VX(vwsubu_vx_w, 8)
10585c19fc15SeopXD GEN_VEXT_VX(vwadd_vx_b, 2)
10595c19fc15SeopXD GEN_VEXT_VX(vwadd_vx_h, 4)
10605c19fc15SeopXD GEN_VEXT_VX(vwadd_vx_w, 8)
10615c19fc15SeopXD GEN_VEXT_VX(vwsub_vx_b, 2)
10625c19fc15SeopXD GEN_VEXT_VX(vwsub_vx_h, 4)
10635c19fc15SeopXD GEN_VEXT_VX(vwsub_vx_w, 8)
10645c19fc15SeopXD GEN_VEXT_VX(vwaddu_wx_b, 2)
10655c19fc15SeopXD GEN_VEXT_VX(vwaddu_wx_h, 4)
10665c19fc15SeopXD GEN_VEXT_VX(vwaddu_wx_w, 8)
10675c19fc15SeopXD GEN_VEXT_VX(vwsubu_wx_b, 2)
10685c19fc15SeopXD GEN_VEXT_VX(vwsubu_wx_h, 4)
10695c19fc15SeopXD GEN_VEXT_VX(vwsubu_wx_w, 8)
10705c19fc15SeopXD GEN_VEXT_VX(vwadd_wx_b, 2)
10715c19fc15SeopXD GEN_VEXT_VX(vwadd_wx_h, 4)
10725c19fc15SeopXD GEN_VEXT_VX(vwadd_wx_w, 8)
10735c19fc15SeopXD GEN_VEXT_VX(vwsub_wx_b, 2)
10745c19fc15SeopXD GEN_VEXT_VX(vwsub_wx_h, 4)
10755c19fc15SeopXD GEN_VEXT_VX(vwsub_wx_w, 8)
10763a6f8f68SLIU Zhiwei 
10773a6f8f68SLIU Zhiwei /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
10783a6f8f68SLIU Zhiwei #define DO_VADC(N, M, C) (N + M + C)
10793a6f8f68SLIU Zhiwei #define DO_VSBC(N, M, C) (N - M - C)
10803a6f8f68SLIU Zhiwei 
10813479a814SFrank Chang #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
10823a6f8f68SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
10833a6f8f68SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)          \
10843a6f8f68SLIU Zhiwei {                                                             \
10853a6f8f68SLIU Zhiwei     uint32_t vl = env->vl;                                    \
10865c19fc15SeopXD     uint32_t esz = sizeof(ETYPE);                             \
10875c19fc15SeopXD     uint32_t total_elems =                                    \
10885c19fc15SeopXD         vext_get_total_elems(env, desc, esz);                 \
10895c19fc15SeopXD     uint32_t vta = vext_vta(desc);                            \
10903a6f8f68SLIU Zhiwei     uint32_t i;                                               \
10913a6f8f68SLIU Zhiwei                                                               \
10927f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                         \
1093df4252b2SDaniel Henrique Barboza                                                               \
1094f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                      \
10953a6f8f68SLIU Zhiwei         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
10963a6f8f68SLIU Zhiwei         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1097bb45485aSFrank Chang         ETYPE carry = vext_elem_mask(v0, i);                  \
10983a6f8f68SLIU Zhiwei                                                               \
10993a6f8f68SLIU Zhiwei         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
11003a6f8f68SLIU Zhiwei     }                                                         \
1101f714361eSFrank Chang     env->vstart = 0;                                          \
11025c19fc15SeopXD     /* set tail elements to 1s */                             \
11035c19fc15SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);  \
11043a6f8f68SLIU Zhiwei }
11053a6f8f68SLIU Zhiwei 
11063479a814SFrank Chang GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
11073479a814SFrank Chang GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
11083479a814SFrank Chang GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
11093479a814SFrank Chang GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
11103a6f8f68SLIU Zhiwei 
11113479a814SFrank Chang GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
11123479a814SFrank Chang GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
11133479a814SFrank Chang GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
11143479a814SFrank Chang GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
11153a6f8f68SLIU Zhiwei 
11163479a814SFrank Chang #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
11173a6f8f68SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
11183a6f8f68SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                     \
11193a6f8f68SLIU Zhiwei {                                                                        \
11203a6f8f68SLIU Zhiwei     uint32_t vl = env->vl;                                               \
11215c19fc15SeopXD     uint32_t esz = sizeof(ETYPE);                                        \
11225c19fc15SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);         \
11235c19fc15SeopXD     uint32_t vta = vext_vta(desc);                                       \
11243a6f8f68SLIU Zhiwei     uint32_t i;                                                          \
11253a6f8f68SLIU Zhiwei                                                                          \
11267f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                    \
1127df4252b2SDaniel Henrique Barboza                                                                          \
1128f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                                 \
11293a6f8f68SLIU Zhiwei         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
1130bb45485aSFrank Chang         ETYPE carry = vext_elem_mask(v0, i);                             \
11313a6f8f68SLIU Zhiwei                                                                          \
11323a6f8f68SLIU Zhiwei         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
11333a6f8f68SLIU Zhiwei     }                                                                    \
1134f714361eSFrank Chang     env->vstart = 0;                                                     \
11355c19fc15SeopXD     /* set tail elements to 1s */                                        \
11365c19fc15SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);             \
11373a6f8f68SLIU Zhiwei }
11383a6f8f68SLIU Zhiwei 
11393479a814SFrank Chang GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
11403479a814SFrank Chang GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
11413479a814SFrank Chang GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
11423479a814SFrank Chang GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
11433a6f8f68SLIU Zhiwei 
11443479a814SFrank Chang GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
11453479a814SFrank Chang GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
11463479a814SFrank Chang GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
11473479a814SFrank Chang GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
11483a6f8f68SLIU Zhiwei 
11493a6f8f68SLIU Zhiwei #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
11503a6f8f68SLIU Zhiwei                           (__typeof(N))(N + M) < N)
11513a6f8f68SLIU Zhiwei #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
11523a6f8f68SLIU Zhiwei 
11533a6f8f68SLIU Zhiwei #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
11543a6f8f68SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
11553a6f8f68SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)          \
11563a6f8f68SLIU Zhiwei {                                                             \
11573a6f8f68SLIU Zhiwei     uint32_t vl = env->vl;                                    \
1158bb45485aSFrank Chang     uint32_t vm = vext_vm(desc);                              \
115958bc9063SDaniel Henrique Barboza     uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;    \
11605c19fc15SeopXD     uint32_t vta_all_1s = vext_vta_all_1s(desc);              \
11613a6f8f68SLIU Zhiwei     uint32_t i;                                               \
11623a6f8f68SLIU Zhiwei                                                               \
11637f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                         \
1164df4252b2SDaniel Henrique Barboza                                                               \
1165f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                      \
11663a6f8f68SLIU Zhiwei         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
11673a6f8f68SLIU Zhiwei         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1168bb45485aSFrank Chang         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1169f9298de5SFrank Chang         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
11703a6f8f68SLIU Zhiwei     }                                                         \
1171f714361eSFrank Chang     env->vstart = 0;                                          \
11723b57254dSWeiwei Li     /*
11733b57254dSWeiwei Li      * mask destination register are always tail-agnostic
11743b57254dSWeiwei Li      * set tail elements to 1s
11753b57254dSWeiwei Li      */                                                       \
11765c19fc15SeopXD     if (vta_all_1s) {                                         \
11775c19fc15SeopXD         for (; i < total_elems; i++) {                        \
11785c19fc15SeopXD             vext_set_elem_mask(vd, i, 1);                     \
11795c19fc15SeopXD         }                                                     \
11805c19fc15SeopXD     }                                                         \
11813a6f8f68SLIU Zhiwei }
11823a6f8f68SLIU Zhiwei 
11833a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
11843a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
11853a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
11863a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
11873a6f8f68SLIU Zhiwei 
11883a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
11893a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
11903a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
11913a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
11923a6f8f68SLIU Zhiwei 
11933a6f8f68SLIU Zhiwei #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
11943a6f8f68SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
11953a6f8f68SLIU Zhiwei                   void *vs2, CPURISCVState *env, uint32_t desc) \
11963a6f8f68SLIU Zhiwei {                                                               \
11973a6f8f68SLIU Zhiwei     uint32_t vl = env->vl;                                      \
1198bb45485aSFrank Chang     uint32_t vm = vext_vm(desc);                                \
119958bc9063SDaniel Henrique Barboza     uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;      \
12005c19fc15SeopXD     uint32_t vta_all_1s = vext_vta_all_1s(desc);                \
12013a6f8f68SLIU Zhiwei     uint32_t i;                                                 \
12023a6f8f68SLIU Zhiwei                                                                 \
12037f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                           \
1204df4252b2SDaniel Henrique Barboza                                                                 \
1205f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                        \
12063a6f8f68SLIU Zhiwei         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1207bb45485aSFrank Chang         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1208f9298de5SFrank Chang         vext_set_elem_mask(vd, i,                               \
12093a6f8f68SLIU Zhiwei                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
12103a6f8f68SLIU Zhiwei     }                                                           \
1211f714361eSFrank Chang     env->vstart = 0;                                            \
12123b57254dSWeiwei Li     /*
12133b57254dSWeiwei Li      * mask destination register are always tail-agnostic
12143b57254dSWeiwei Li      * set tail elements to 1s
12153b57254dSWeiwei Li      */                                                         \
12165c19fc15SeopXD     if (vta_all_1s) {                                           \
12175c19fc15SeopXD         for (; i < total_elems; i++) {                          \
12185c19fc15SeopXD             vext_set_elem_mask(vd, i, 1);                       \
12195c19fc15SeopXD         }                                                       \
12205c19fc15SeopXD     }                                                           \
12213a6f8f68SLIU Zhiwei }
12223a6f8f68SLIU Zhiwei 
12233a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
12243a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
12253a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
12263a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
12273a6f8f68SLIU Zhiwei 
12283a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
12293a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
12303a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
12313a6f8f68SLIU Zhiwei GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1232d3842924SLIU Zhiwei 
1233d3842924SLIU Zhiwei /* Vector Bitwise Logical Instructions */
1234d3842924SLIU Zhiwei RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1235d3842924SLIU Zhiwei RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1236d3842924SLIU Zhiwei RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1237d3842924SLIU Zhiwei RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1238d3842924SLIU Zhiwei RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1239d3842924SLIU Zhiwei RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1240d3842924SLIU Zhiwei RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1241d3842924SLIU Zhiwei RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1242d3842924SLIU Zhiwei RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1243d3842924SLIU Zhiwei RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1244d3842924SLIU Zhiwei RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1245d3842924SLIU Zhiwei RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1246f1eed927SeopXD GEN_VEXT_VV(vand_vv_b, 1)
1247f1eed927SeopXD GEN_VEXT_VV(vand_vv_h, 2)
1248f1eed927SeopXD GEN_VEXT_VV(vand_vv_w, 4)
1249f1eed927SeopXD GEN_VEXT_VV(vand_vv_d, 8)
1250f1eed927SeopXD GEN_VEXT_VV(vor_vv_b, 1)
1251f1eed927SeopXD GEN_VEXT_VV(vor_vv_h, 2)
1252f1eed927SeopXD GEN_VEXT_VV(vor_vv_w, 4)
1253f1eed927SeopXD GEN_VEXT_VV(vor_vv_d, 8)
1254f1eed927SeopXD GEN_VEXT_VV(vxor_vv_b, 1)
1255f1eed927SeopXD GEN_VEXT_VV(vxor_vv_h, 2)
1256f1eed927SeopXD GEN_VEXT_VV(vxor_vv_w, 4)
1257f1eed927SeopXD GEN_VEXT_VV(vxor_vv_d, 8)
1258d3842924SLIU Zhiwei 
1259d3842924SLIU Zhiwei RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1260d3842924SLIU Zhiwei RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1261d3842924SLIU Zhiwei RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1262d3842924SLIU Zhiwei RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1263d3842924SLIU Zhiwei RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1264d3842924SLIU Zhiwei RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1265d3842924SLIU Zhiwei RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1266d3842924SLIU Zhiwei RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1267d3842924SLIU Zhiwei RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1268d3842924SLIU Zhiwei RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1269d3842924SLIU Zhiwei RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1270d3842924SLIU Zhiwei RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
12715c19fc15SeopXD GEN_VEXT_VX(vand_vx_b, 1)
12725c19fc15SeopXD GEN_VEXT_VX(vand_vx_h, 2)
12735c19fc15SeopXD GEN_VEXT_VX(vand_vx_w, 4)
12745c19fc15SeopXD GEN_VEXT_VX(vand_vx_d, 8)
12755c19fc15SeopXD GEN_VEXT_VX(vor_vx_b, 1)
12765c19fc15SeopXD GEN_VEXT_VX(vor_vx_h, 2)
12775c19fc15SeopXD GEN_VEXT_VX(vor_vx_w, 4)
12785c19fc15SeopXD GEN_VEXT_VX(vor_vx_d, 8)
12795c19fc15SeopXD GEN_VEXT_VX(vxor_vx_b, 1)
12805c19fc15SeopXD GEN_VEXT_VX(vxor_vx_h, 2)
12815c19fc15SeopXD GEN_VEXT_VX(vxor_vx_w, 4)
12825c19fc15SeopXD GEN_VEXT_VX(vxor_vx_d, 8)
12833277d955SLIU Zhiwei 
12843277d955SLIU Zhiwei /* Vector Single-Width Bit Shift Instructions */
12853277d955SLIU Zhiwei #define DO_SLL(N, M)  (N << (M))
12863277d955SLIU Zhiwei #define DO_SRL(N, M)  (N >> (M))
12873277d955SLIU Zhiwei 
12883277d955SLIU Zhiwei /* generate the helpers for shift instructions with two vector operators */
12893479a814SFrank Chang #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
12903277d955SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
12913277d955SLIU Zhiwei                   void *vs2, CPURISCVState *env, uint32_t desc)           \
12923277d955SLIU Zhiwei {                                                                         \
12933277d955SLIU Zhiwei     uint32_t vm = vext_vm(desc);                                          \
12943277d955SLIU Zhiwei     uint32_t vl = env->vl;                                                \
12957b1bff41SeopXD     uint32_t esz = sizeof(TS1);                                           \
12967b1bff41SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);          \
12977b1bff41SeopXD     uint32_t vta = vext_vta(desc);                                        \
1298fd93045eSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                        \
12993277d955SLIU Zhiwei     uint32_t i;                                                           \
13003277d955SLIU Zhiwei                                                                           \
13017f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                     \
1302df4252b2SDaniel Henrique Barboza                                                                           \
1303f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                                  \
1304f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                              \
1305fd93045eSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                           \
1306fd93045eSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);           \
13073277d955SLIU Zhiwei             continue;                                                     \
13083277d955SLIU Zhiwei         }                                                                 \
13093277d955SLIU Zhiwei         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
13103277d955SLIU Zhiwei         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
13113277d955SLIU Zhiwei         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
13123277d955SLIU Zhiwei     }                                                                     \
1313f714361eSFrank Chang     env->vstart = 0;                                                      \
13147b1bff41SeopXD     /* set tail elements to 1s */                                         \
13157b1bff41SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);              \
13163277d955SLIU Zhiwei }
13173277d955SLIU Zhiwei 
13183479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
13193479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
13203479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
13213479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
13223277d955SLIU Zhiwei 
13233479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
13243479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
13253479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
13263479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
13273277d955SLIU Zhiwei 
13283479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
13293479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
13303479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
13313479a814SFrank Chang GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
13323277d955SLIU Zhiwei 
1333246f8796SWeiwei Li /*
1334246f8796SWeiwei Li  * generate the helpers for shift instructions with one vector and one scalar
1335246f8796SWeiwei Li  */
13363479a814SFrank Chang #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
13373277d955SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1338c45eff30SWeiwei Li                   void *vs2, CPURISCVState *env,            \
1339c45eff30SWeiwei Li                   uint32_t desc)                            \
13403277d955SLIU Zhiwei {                                                           \
13413277d955SLIU Zhiwei     uint32_t vm = vext_vm(desc);                            \
13423277d955SLIU Zhiwei     uint32_t vl = env->vl;                                  \
13437b1bff41SeopXD     uint32_t esz = sizeof(TD);                              \
13447b1bff41SeopXD     uint32_t total_elems =                                  \
13457b1bff41SeopXD         vext_get_total_elems(env, desc, esz);               \
13467b1bff41SeopXD     uint32_t vta = vext_vta(desc);                          \
1347fd93045eSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                          \
13483277d955SLIU Zhiwei     uint32_t i;                                             \
13493277d955SLIU Zhiwei                                                             \
13507f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                       \
1351df4252b2SDaniel Henrique Barboza                                                             \
1352f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                    \
1353f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                \
1354fd93045eSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */             \
1355fd93045eSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz,             \
1356fd93045eSYueh-Ting (eop) Chen                               (i + 1) * esz);               \
13573277d955SLIU Zhiwei             continue;                                       \
13583277d955SLIU Zhiwei         }                                                   \
13593277d955SLIU Zhiwei         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
13603277d955SLIU Zhiwei         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
13613277d955SLIU Zhiwei     }                                                       \
1362f714361eSFrank Chang     env->vstart = 0;                                        \
13637b1bff41SeopXD     /* set tail elements to 1s */                           \
13647b1bff41SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
13653277d955SLIU Zhiwei }
13663277d955SLIU Zhiwei 
13673479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
13683479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
13693479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
13703479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
13713277d955SLIU Zhiwei 
13723479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
13733479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
13743479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
13753479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
13763277d955SLIU Zhiwei 
13773479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
13783479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
13793479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
13803479a814SFrank Chang GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
13817689b028SLIU Zhiwei 
13827689b028SLIU Zhiwei /* Vector Narrowing Integer Right Shift Instructions */
13837daa5852SFrank Chang GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
13847daa5852SFrank Chang GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
13857daa5852SFrank Chang GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
13867daa5852SFrank Chang GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
13877daa5852SFrank Chang GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
13887daa5852SFrank Chang GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
13897daa5852SFrank Chang GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
13907daa5852SFrank Chang GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
13917daa5852SFrank Chang GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
13927daa5852SFrank Chang GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
13937daa5852SFrank Chang GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
13947daa5852SFrank Chang GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
13951366fc79SLIU Zhiwei 
13961366fc79SLIU Zhiwei /* Vector Integer Comparison Instructions */
13971366fc79SLIU Zhiwei #define DO_MSEQ(N, M) (N == M)
13981366fc79SLIU Zhiwei #define DO_MSNE(N, M) (N != M)
13991366fc79SLIU Zhiwei #define DO_MSLT(N, M) (N < M)
14001366fc79SLIU Zhiwei #define DO_MSLE(N, M) (N <= M)
14011366fc79SLIU Zhiwei #define DO_MSGT(N, M) (N > M)
14021366fc79SLIU Zhiwei 
14031366fc79SLIU Zhiwei #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
14041366fc79SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
14051366fc79SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)          \
14061366fc79SLIU Zhiwei {                                                             \
14071366fc79SLIU Zhiwei     uint32_t vm = vext_vm(desc);                              \
14081366fc79SLIU Zhiwei     uint32_t vl = env->vl;                                    \
140958bc9063SDaniel Henrique Barboza     uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;    \
141038581e5cSeopXD     uint32_t vta_all_1s = vext_vta_all_1s(desc);              \
14116e11d7eaSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                            \
14121366fc79SLIU Zhiwei     uint32_t i;                                               \
14131366fc79SLIU Zhiwei                                                               \
14147f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                         \
1415df4252b2SDaniel Henrique Barboza                                                               \
1416f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                      \
14171366fc79SLIU Zhiwei         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
14181366fc79SLIU Zhiwei         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1419f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                  \
14206e11d7eaSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */               \
14216e11d7eaSYueh-Ting (eop) Chen             if (vma) {                                        \
14226e11d7eaSYueh-Ting (eop) Chen                 vext_set_elem_mask(vd, i, 1);                 \
14236e11d7eaSYueh-Ting (eop) Chen             }                                                 \
14241366fc79SLIU Zhiwei             continue;                                         \
14251366fc79SLIU Zhiwei         }                                                     \
1426f9298de5SFrank Chang         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
14271366fc79SLIU Zhiwei     }                                                         \
1428f714361eSFrank Chang     env->vstart = 0;                                          \
14293b57254dSWeiwei Li     /*
14303b57254dSWeiwei Li      * mask destination register are always tail-agnostic
14313b57254dSWeiwei Li      * set tail elements to 1s
14323b57254dSWeiwei Li      */                                                       \
143338581e5cSeopXD     if (vta_all_1s) {                                         \
143438581e5cSeopXD         for (; i < total_elems; i++) {                        \
143538581e5cSeopXD             vext_set_elem_mask(vd, i, 1);                     \
143638581e5cSeopXD         }                                                     \
143738581e5cSeopXD     }                                                         \
14381366fc79SLIU Zhiwei }
14391366fc79SLIU Zhiwei 
14401366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
14411366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
14421366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
14431366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
14441366fc79SLIU Zhiwei 
14451366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
14461366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
14471366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
14481366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
14491366fc79SLIU Zhiwei 
14501366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
14511366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
14521366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
14531366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
14541366fc79SLIU Zhiwei 
14551366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
14561366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
14571366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
14581366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
14591366fc79SLIU Zhiwei 
14601366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
14611366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
14621366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
14631366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
14641366fc79SLIU Zhiwei 
14651366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
14661366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
14671366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
14681366fc79SLIU Zhiwei GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
14691366fc79SLIU Zhiwei 
14701366fc79SLIU Zhiwei #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
14711366fc79SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
14721366fc79SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                \
14731366fc79SLIU Zhiwei {                                                                   \
14741366fc79SLIU Zhiwei     uint32_t vm = vext_vm(desc);                                    \
14751366fc79SLIU Zhiwei     uint32_t vl = env->vl;                                          \
147658bc9063SDaniel Henrique Barboza     uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;          \
147738581e5cSeopXD     uint32_t vta_all_1s = vext_vta_all_1s(desc);                    \
14786e11d7eaSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                  \
14791366fc79SLIU Zhiwei     uint32_t i;                                                     \
14801366fc79SLIU Zhiwei                                                                     \
14817f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                               \
1482df4252b2SDaniel Henrique Barboza                                                                     \
1483f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                            \
14841366fc79SLIU Zhiwei         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1485f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                        \
14866e11d7eaSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                     \
14876e11d7eaSYueh-Ting (eop) Chen             if (vma) {                                              \
14886e11d7eaSYueh-Ting (eop) Chen                 vext_set_elem_mask(vd, i, 1);                       \
14896e11d7eaSYueh-Ting (eop) Chen             }                                                       \
14901366fc79SLIU Zhiwei             continue;                                               \
14911366fc79SLIU Zhiwei         }                                                           \
1492f9298de5SFrank Chang         vext_set_elem_mask(vd, i,                                   \
14931366fc79SLIU Zhiwei                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
14941366fc79SLIU Zhiwei     }                                                               \
1495f714361eSFrank Chang     env->vstart = 0;                                                \
14963b57254dSWeiwei Li     /*
14973b57254dSWeiwei Li      * mask destination register are always tail-agnostic
14983b57254dSWeiwei Li      * set tail elements to 1s
14993b57254dSWeiwei Li      */                                                             \
150038581e5cSeopXD     if (vta_all_1s) {                                               \
150138581e5cSeopXD         for (; i < total_elems; i++) {                              \
150238581e5cSeopXD             vext_set_elem_mask(vd, i, 1);                           \
150338581e5cSeopXD         }                                                           \
150438581e5cSeopXD     }                                                               \
15051366fc79SLIU Zhiwei }
15061366fc79SLIU Zhiwei 
15071366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
15081366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
15091366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
15101366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
15111366fc79SLIU Zhiwei 
15121366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
15131366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
15141366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
15151366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
15161366fc79SLIU Zhiwei 
15171366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
15181366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
15191366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
15201366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
15211366fc79SLIU Zhiwei 
15221366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
15231366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
15241366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
15251366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
15261366fc79SLIU Zhiwei 
15271366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
15281366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
15291366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
15301366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
15311366fc79SLIU Zhiwei 
15321366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
15331366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
15341366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
15351366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
15361366fc79SLIU Zhiwei 
15371366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
15381366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
15391366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
15401366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
15411366fc79SLIU Zhiwei 
15421366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
15431366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
15441366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
15451366fc79SLIU Zhiwei GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1546558fa779SLIU Zhiwei 
1547558fa779SLIU Zhiwei /* Vector Integer Min/Max Instructions */
1548558fa779SLIU Zhiwei RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1549558fa779SLIU Zhiwei RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1550558fa779SLIU Zhiwei RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1551558fa779SLIU Zhiwei RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1552558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1553558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1554558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1555558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1556558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1557558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1558558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1559558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1560558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1561558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1562558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1563558fa779SLIU Zhiwei RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1564f1eed927SeopXD GEN_VEXT_VV(vminu_vv_b, 1)
1565f1eed927SeopXD GEN_VEXT_VV(vminu_vv_h, 2)
1566f1eed927SeopXD GEN_VEXT_VV(vminu_vv_w, 4)
1567f1eed927SeopXD GEN_VEXT_VV(vminu_vv_d, 8)
1568f1eed927SeopXD GEN_VEXT_VV(vmin_vv_b, 1)
1569f1eed927SeopXD GEN_VEXT_VV(vmin_vv_h, 2)
1570f1eed927SeopXD GEN_VEXT_VV(vmin_vv_w, 4)
1571f1eed927SeopXD GEN_VEXT_VV(vmin_vv_d, 8)
1572f1eed927SeopXD GEN_VEXT_VV(vmaxu_vv_b, 1)
1573f1eed927SeopXD GEN_VEXT_VV(vmaxu_vv_h, 2)
1574f1eed927SeopXD GEN_VEXT_VV(vmaxu_vv_w, 4)
1575f1eed927SeopXD GEN_VEXT_VV(vmaxu_vv_d, 8)
1576f1eed927SeopXD GEN_VEXT_VV(vmax_vv_b, 1)
1577f1eed927SeopXD GEN_VEXT_VV(vmax_vv_h, 2)
1578f1eed927SeopXD GEN_VEXT_VV(vmax_vv_w, 4)
1579f1eed927SeopXD GEN_VEXT_VV(vmax_vv_d, 8)
1580558fa779SLIU Zhiwei 
1581558fa779SLIU Zhiwei RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1582558fa779SLIU Zhiwei RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1583558fa779SLIU Zhiwei RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1584558fa779SLIU Zhiwei RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1585558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1586558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1587558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1588558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1589558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1590558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1591558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1592558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1593558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1594558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1595558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1596558fa779SLIU Zhiwei RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
15975c19fc15SeopXD GEN_VEXT_VX(vminu_vx_b, 1)
15985c19fc15SeopXD GEN_VEXT_VX(vminu_vx_h, 2)
15995c19fc15SeopXD GEN_VEXT_VX(vminu_vx_w, 4)
16005c19fc15SeopXD GEN_VEXT_VX(vminu_vx_d, 8)
16015c19fc15SeopXD GEN_VEXT_VX(vmin_vx_b, 1)
16025c19fc15SeopXD GEN_VEXT_VX(vmin_vx_h, 2)
16035c19fc15SeopXD GEN_VEXT_VX(vmin_vx_w, 4)
16045c19fc15SeopXD GEN_VEXT_VX(vmin_vx_d, 8)
16055c19fc15SeopXD GEN_VEXT_VX(vmaxu_vx_b, 1)
16065c19fc15SeopXD GEN_VEXT_VX(vmaxu_vx_h, 2)
16075c19fc15SeopXD GEN_VEXT_VX(vmaxu_vx_w, 4)
16085c19fc15SeopXD GEN_VEXT_VX(vmaxu_vx_d, 8)
16095c19fc15SeopXD GEN_VEXT_VX(vmax_vx_b, 1)
16105c19fc15SeopXD GEN_VEXT_VX(vmax_vx_h, 2)
16115c19fc15SeopXD GEN_VEXT_VX(vmax_vx_w, 4)
16125c19fc15SeopXD GEN_VEXT_VX(vmax_vx_d, 8)
1613958b85f3SLIU Zhiwei 
1614958b85f3SLIU Zhiwei /* Vector Single-Width Integer Multiply Instructions */
1615958b85f3SLIU Zhiwei #define DO_MUL(N, M) (N * M)
1616958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1617958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1618958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1619958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1620f1eed927SeopXD GEN_VEXT_VV(vmul_vv_b, 1)
1621f1eed927SeopXD GEN_VEXT_VV(vmul_vv_h, 2)
1622f1eed927SeopXD GEN_VEXT_VV(vmul_vv_w, 4)
1623f1eed927SeopXD GEN_VEXT_VV(vmul_vv_d, 8)
1624958b85f3SLIU Zhiwei 
1625958b85f3SLIU Zhiwei static int8_t do_mulh_b(int8_t s2, int8_t s1)
1626958b85f3SLIU Zhiwei {
1627958b85f3SLIU Zhiwei     return (int16_t)s2 * (int16_t)s1 >> 8;
1628958b85f3SLIU Zhiwei }
1629958b85f3SLIU Zhiwei 
1630958b85f3SLIU Zhiwei static int16_t do_mulh_h(int16_t s2, int16_t s1)
1631958b85f3SLIU Zhiwei {
1632958b85f3SLIU Zhiwei     return (int32_t)s2 * (int32_t)s1 >> 16;
do_mulh_h(int16_t s2,int16_t s1)1633958b85f3SLIU Zhiwei }
1634958b85f3SLIU Zhiwei 
1635958b85f3SLIU Zhiwei static int32_t do_mulh_w(int32_t s2, int32_t s1)
1636958b85f3SLIU Zhiwei {
1637958b85f3SLIU Zhiwei     return (int64_t)s2 * (int64_t)s1 >> 32;
1638958b85f3SLIU Zhiwei }
1639958b85f3SLIU Zhiwei 
1640958b85f3SLIU Zhiwei static int64_t do_mulh_d(int64_t s2, int64_t s1)
1641958b85f3SLIU Zhiwei {
1642958b85f3SLIU Zhiwei     uint64_t hi_64, lo_64;
do_mulh_d(int64_t s2,int64_t s1)1643958b85f3SLIU Zhiwei 
1644958b85f3SLIU Zhiwei     muls64(&lo_64, &hi_64, s1, s2);
1645958b85f3SLIU Zhiwei     return hi_64;
1646958b85f3SLIU Zhiwei }
1647958b85f3SLIU Zhiwei 
1648958b85f3SLIU Zhiwei static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1649958b85f3SLIU Zhiwei {
1650958b85f3SLIU Zhiwei     return (uint16_t)s2 * (uint16_t)s1 >> 8;
do_mulhu_b(uint8_t s2,uint8_t s1)1651958b85f3SLIU Zhiwei }
1652958b85f3SLIU Zhiwei 
1653958b85f3SLIU Zhiwei static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1654958b85f3SLIU Zhiwei {
1655958b85f3SLIU Zhiwei     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1656958b85f3SLIU Zhiwei }
1657958b85f3SLIU Zhiwei 
1658958b85f3SLIU Zhiwei static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1659958b85f3SLIU Zhiwei {
1660958b85f3SLIU Zhiwei     return (uint64_t)s2 * (uint64_t)s1 >> 32;
do_mulhu_w(uint32_t s2,uint32_t s1)1661958b85f3SLIU Zhiwei }
1662958b85f3SLIU Zhiwei 
1663958b85f3SLIU Zhiwei static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1664958b85f3SLIU Zhiwei {
1665958b85f3SLIU Zhiwei     uint64_t hi_64, lo_64;
1666958b85f3SLIU Zhiwei 
1667958b85f3SLIU Zhiwei     mulu64(&lo_64, &hi_64, s2, s1);
1668958b85f3SLIU Zhiwei     return hi_64;
1669958b85f3SLIU Zhiwei }
1670958b85f3SLIU Zhiwei 
1671958b85f3SLIU Zhiwei static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1672958b85f3SLIU Zhiwei {
1673958b85f3SLIU Zhiwei     return (int16_t)s2 * (uint16_t)s1 >> 8;
do_mulhsu_b(int8_t s2,uint8_t s1)1674958b85f3SLIU Zhiwei }
1675958b85f3SLIU Zhiwei 
1676958b85f3SLIU Zhiwei static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1677958b85f3SLIU Zhiwei {
1678958b85f3SLIU Zhiwei     return (int32_t)s2 * (uint32_t)s1 >> 16;
1679958b85f3SLIU Zhiwei }
1680958b85f3SLIU Zhiwei 
1681958b85f3SLIU Zhiwei static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1682958b85f3SLIU Zhiwei {
1683958b85f3SLIU Zhiwei     return (int64_t)s2 * (uint64_t)s1 >> 32;
do_mulhsu_w(int32_t s2,uint32_t s1)1684958b85f3SLIU Zhiwei }
1685958b85f3SLIU Zhiwei 
1686958b85f3SLIU Zhiwei /*
1687958b85f3SLIU Zhiwei  * Let  A = signed operand,
1688958b85f3SLIU Zhiwei  *      B = unsigned operand
1689958b85f3SLIU Zhiwei  *      P = mulu64(A, B), unsigned product
1690958b85f3SLIU Zhiwei  *
1691958b85f3SLIU Zhiwei  * LET  X = 2 ** 64  - A, 2's complement of A
1692958b85f3SLIU Zhiwei  *      SP = signed product
1693958b85f3SLIU Zhiwei  * THEN
1694958b85f3SLIU Zhiwei  *      IF A < 0
1695958b85f3SLIU Zhiwei  *          SP = -X * B
1696958b85f3SLIU Zhiwei  *             = -(2 ** 64 - A) * B
1697958b85f3SLIU Zhiwei  *             = A * B - 2 ** 64 * B
1698958b85f3SLIU Zhiwei  *             = P - 2 ** 64 * B
1699958b85f3SLIU Zhiwei  *      ELSE
1700958b85f3SLIU Zhiwei  *          SP = P
1701958b85f3SLIU Zhiwei  * THEN
1702958b85f3SLIU Zhiwei  *      HI_P -= (A < 0 ? B : 0)
1703958b85f3SLIU Zhiwei  */
1704958b85f3SLIU Zhiwei 
1705958b85f3SLIU Zhiwei static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1706958b85f3SLIU Zhiwei {
1707958b85f3SLIU Zhiwei     uint64_t hi_64, lo_64;
1708958b85f3SLIU Zhiwei 
1709958b85f3SLIU Zhiwei     mulu64(&lo_64, &hi_64, s2, s1);
1710958b85f3SLIU Zhiwei 
1711958b85f3SLIU Zhiwei     hi_64 -= s2 < 0 ? s1 : 0;
1712958b85f3SLIU Zhiwei     return hi_64;
1713958b85f3SLIU Zhiwei }
1714958b85f3SLIU Zhiwei 
1715958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1716958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1717958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1718958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1719958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1720958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1721958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1722958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1723958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1724958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1725958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1726958b85f3SLIU Zhiwei RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1727f1eed927SeopXD GEN_VEXT_VV(vmulh_vv_b, 1)
1728f1eed927SeopXD GEN_VEXT_VV(vmulh_vv_h, 2)
1729f1eed927SeopXD GEN_VEXT_VV(vmulh_vv_w, 4)
1730f1eed927SeopXD GEN_VEXT_VV(vmulh_vv_d, 8)
1731f1eed927SeopXD GEN_VEXT_VV(vmulhu_vv_b, 1)
1732f1eed927SeopXD GEN_VEXT_VV(vmulhu_vv_h, 2)
1733f1eed927SeopXD GEN_VEXT_VV(vmulhu_vv_w, 4)
1734f1eed927SeopXD GEN_VEXT_VV(vmulhu_vv_d, 8)
1735f1eed927SeopXD GEN_VEXT_VV(vmulhsu_vv_b, 1)
1736f1eed927SeopXD GEN_VEXT_VV(vmulhsu_vv_h, 2)
1737f1eed927SeopXD GEN_VEXT_VV(vmulhsu_vv_w, 4)
1738f1eed927SeopXD GEN_VEXT_VV(vmulhsu_vv_d, 8)
1739958b85f3SLIU Zhiwei 
1740958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1741958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1742958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1743958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1744958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1745958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1746958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1747958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1748958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1749958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1750958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1751958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1752958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1753958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1754958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1755958b85f3SLIU Zhiwei RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
17565c19fc15SeopXD GEN_VEXT_VX(vmul_vx_b, 1)
17575c19fc15SeopXD GEN_VEXT_VX(vmul_vx_h, 2)
17585c19fc15SeopXD GEN_VEXT_VX(vmul_vx_w, 4)
17595c19fc15SeopXD GEN_VEXT_VX(vmul_vx_d, 8)
17605c19fc15SeopXD GEN_VEXT_VX(vmulh_vx_b, 1)
17615c19fc15SeopXD GEN_VEXT_VX(vmulh_vx_h, 2)
17625c19fc15SeopXD GEN_VEXT_VX(vmulh_vx_w, 4)
17635c19fc15SeopXD GEN_VEXT_VX(vmulh_vx_d, 8)
17645c19fc15SeopXD GEN_VEXT_VX(vmulhu_vx_b, 1)
17655c19fc15SeopXD GEN_VEXT_VX(vmulhu_vx_h, 2)
17665c19fc15SeopXD GEN_VEXT_VX(vmulhu_vx_w, 4)
17675c19fc15SeopXD GEN_VEXT_VX(vmulhu_vx_d, 8)
17685c19fc15SeopXD GEN_VEXT_VX(vmulhsu_vx_b, 1)
17695c19fc15SeopXD GEN_VEXT_VX(vmulhsu_vx_h, 2)
17705c19fc15SeopXD GEN_VEXT_VX(vmulhsu_vx_w, 4)
17715c19fc15SeopXD GEN_VEXT_VX(vmulhsu_vx_d, 8)
177285e6658cSLIU Zhiwei 
177385e6658cSLIU Zhiwei /* Vector Integer Divide Instructions */
177485e6658cSLIU Zhiwei #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
177585e6658cSLIU Zhiwei #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
177685e6658cSLIU Zhiwei #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) : \
177785e6658cSLIU Zhiwei         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
177885e6658cSLIU Zhiwei #define DO_REM(N, M)  (unlikely(M == 0) ? N : \
177985e6658cSLIU Zhiwei         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
178085e6658cSLIU Zhiwei 
178185e6658cSLIU Zhiwei RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
178285e6658cSLIU Zhiwei RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
178385e6658cSLIU Zhiwei RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
178485e6658cSLIU Zhiwei RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
178585e6658cSLIU Zhiwei RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
178685e6658cSLIU Zhiwei RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
178785e6658cSLIU Zhiwei RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
178885e6658cSLIU Zhiwei RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
178985e6658cSLIU Zhiwei RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
179085e6658cSLIU Zhiwei RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
179185e6658cSLIU Zhiwei RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
179285e6658cSLIU Zhiwei RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
179385e6658cSLIU Zhiwei RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
179485e6658cSLIU Zhiwei RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
179585e6658cSLIU Zhiwei RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
179685e6658cSLIU Zhiwei RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1797f1eed927SeopXD GEN_VEXT_VV(vdivu_vv_b, 1)
1798f1eed927SeopXD GEN_VEXT_VV(vdivu_vv_h, 2)
1799f1eed927SeopXD GEN_VEXT_VV(vdivu_vv_w, 4)
1800f1eed927SeopXD GEN_VEXT_VV(vdivu_vv_d, 8)
1801f1eed927SeopXD GEN_VEXT_VV(vdiv_vv_b, 1)
1802f1eed927SeopXD GEN_VEXT_VV(vdiv_vv_h, 2)
1803f1eed927SeopXD GEN_VEXT_VV(vdiv_vv_w, 4)
1804f1eed927SeopXD GEN_VEXT_VV(vdiv_vv_d, 8)
1805f1eed927SeopXD GEN_VEXT_VV(vremu_vv_b, 1)
1806f1eed927SeopXD GEN_VEXT_VV(vremu_vv_h, 2)
1807f1eed927SeopXD GEN_VEXT_VV(vremu_vv_w, 4)
1808f1eed927SeopXD GEN_VEXT_VV(vremu_vv_d, 8)
1809f1eed927SeopXD GEN_VEXT_VV(vrem_vv_b, 1)
1810f1eed927SeopXD GEN_VEXT_VV(vrem_vv_h, 2)
1811f1eed927SeopXD GEN_VEXT_VV(vrem_vv_w, 4)
1812f1eed927SeopXD GEN_VEXT_VV(vrem_vv_d, 8)
181385e6658cSLIU Zhiwei 
181485e6658cSLIU Zhiwei RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
181585e6658cSLIU Zhiwei RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
181685e6658cSLIU Zhiwei RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
181785e6658cSLIU Zhiwei RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
181885e6658cSLIU Zhiwei RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
181985e6658cSLIU Zhiwei RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
182085e6658cSLIU Zhiwei RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
182185e6658cSLIU Zhiwei RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
182285e6658cSLIU Zhiwei RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
182385e6658cSLIU Zhiwei RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
182485e6658cSLIU Zhiwei RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
182585e6658cSLIU Zhiwei RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
182685e6658cSLIU Zhiwei RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
182785e6658cSLIU Zhiwei RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
182885e6658cSLIU Zhiwei RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
182985e6658cSLIU Zhiwei RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
18305c19fc15SeopXD GEN_VEXT_VX(vdivu_vx_b, 1)
18315c19fc15SeopXD GEN_VEXT_VX(vdivu_vx_h, 2)
18325c19fc15SeopXD GEN_VEXT_VX(vdivu_vx_w, 4)
18335c19fc15SeopXD GEN_VEXT_VX(vdivu_vx_d, 8)
18345c19fc15SeopXD GEN_VEXT_VX(vdiv_vx_b, 1)
18355c19fc15SeopXD GEN_VEXT_VX(vdiv_vx_h, 2)
18365c19fc15SeopXD GEN_VEXT_VX(vdiv_vx_w, 4)
18375c19fc15SeopXD GEN_VEXT_VX(vdiv_vx_d, 8)
18385c19fc15SeopXD GEN_VEXT_VX(vremu_vx_b, 1)
18395c19fc15SeopXD GEN_VEXT_VX(vremu_vx_h, 2)
18405c19fc15SeopXD GEN_VEXT_VX(vremu_vx_w, 4)
18415c19fc15SeopXD GEN_VEXT_VX(vremu_vx_d, 8)
18425c19fc15SeopXD GEN_VEXT_VX(vrem_vx_b, 1)
18435c19fc15SeopXD GEN_VEXT_VX(vrem_vx_h, 2)
18445c19fc15SeopXD GEN_VEXT_VX(vrem_vx_w, 4)
18455c19fc15SeopXD GEN_VEXT_VX(vrem_vx_d, 8)
184697b1cba3SLIU Zhiwei 
184797b1cba3SLIU Zhiwei /* Vector Widening Integer Multiply Instructions */
184897b1cba3SLIU Zhiwei RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
184997b1cba3SLIU Zhiwei RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
185097b1cba3SLIU Zhiwei RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
185197b1cba3SLIU Zhiwei RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
185297b1cba3SLIU Zhiwei RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
185397b1cba3SLIU Zhiwei RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
185497b1cba3SLIU Zhiwei RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
185597b1cba3SLIU Zhiwei RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
185697b1cba3SLIU Zhiwei RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1857f1eed927SeopXD GEN_VEXT_VV(vwmul_vv_b, 2)
1858f1eed927SeopXD GEN_VEXT_VV(vwmul_vv_h, 4)
1859f1eed927SeopXD GEN_VEXT_VV(vwmul_vv_w, 8)
1860f1eed927SeopXD GEN_VEXT_VV(vwmulu_vv_b, 2)
1861f1eed927SeopXD GEN_VEXT_VV(vwmulu_vv_h, 4)
1862f1eed927SeopXD GEN_VEXT_VV(vwmulu_vv_w, 8)
1863f1eed927SeopXD GEN_VEXT_VV(vwmulsu_vv_b, 2)
1864f1eed927SeopXD GEN_VEXT_VV(vwmulsu_vv_h, 4)
1865f1eed927SeopXD GEN_VEXT_VV(vwmulsu_vv_w, 8)
186697b1cba3SLIU Zhiwei 
186797b1cba3SLIU Zhiwei RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
186897b1cba3SLIU Zhiwei RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
186997b1cba3SLIU Zhiwei RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
187097b1cba3SLIU Zhiwei RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
187197b1cba3SLIU Zhiwei RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
187297b1cba3SLIU Zhiwei RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
187397b1cba3SLIU Zhiwei RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
187497b1cba3SLIU Zhiwei RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
187597b1cba3SLIU Zhiwei RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
18765c19fc15SeopXD GEN_VEXT_VX(vwmul_vx_b, 2)
18775c19fc15SeopXD GEN_VEXT_VX(vwmul_vx_h, 4)
18785c19fc15SeopXD GEN_VEXT_VX(vwmul_vx_w, 8)
18795c19fc15SeopXD GEN_VEXT_VX(vwmulu_vx_b, 2)
18805c19fc15SeopXD GEN_VEXT_VX(vwmulu_vx_h, 4)
18815c19fc15SeopXD GEN_VEXT_VX(vwmulu_vx_w, 8)
18825c19fc15SeopXD GEN_VEXT_VX(vwmulsu_vx_b, 2)
18835c19fc15SeopXD GEN_VEXT_VX(vwmulsu_vx_h, 4)
18845c19fc15SeopXD GEN_VEXT_VX(vwmulsu_vx_w, 8)
188554df813aSLIU Zhiwei 
188654df813aSLIU Zhiwei /* Vector Single-Width Integer Multiply-Add Instructions */
188754df813aSLIU Zhiwei #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
188854df813aSLIU Zhiwei static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
188954df813aSLIU Zhiwei {                                                                  \
189054df813aSLIU Zhiwei     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
189154df813aSLIU Zhiwei     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
189254df813aSLIU Zhiwei     TD d = *((TD *)vd + HD(i));                                    \
189354df813aSLIU Zhiwei     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
189454df813aSLIU Zhiwei }
189554df813aSLIU Zhiwei 
189654df813aSLIU Zhiwei #define DO_MACC(N, M, D) (M * N + D)
189754df813aSLIU Zhiwei #define DO_NMSAC(N, M, D) (-(M * N) + D)
189854df813aSLIU Zhiwei #define DO_MADD(N, M, D) (M * D + N)
189954df813aSLIU Zhiwei #define DO_NMSUB(N, M, D) (-(M * D) + N)
190054df813aSLIU Zhiwei RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
190154df813aSLIU Zhiwei RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
190254df813aSLIU Zhiwei RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
190354df813aSLIU Zhiwei RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
190454df813aSLIU Zhiwei RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
190554df813aSLIU Zhiwei RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
190654df813aSLIU Zhiwei RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
190754df813aSLIU Zhiwei RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
190854df813aSLIU Zhiwei RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
190954df813aSLIU Zhiwei RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
191054df813aSLIU Zhiwei RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
191154df813aSLIU Zhiwei RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
191254df813aSLIU Zhiwei RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
191354df813aSLIU Zhiwei RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
191454df813aSLIU Zhiwei RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
191554df813aSLIU Zhiwei RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1916f1eed927SeopXD GEN_VEXT_VV(vmacc_vv_b, 1)
1917f1eed927SeopXD GEN_VEXT_VV(vmacc_vv_h, 2)
1918f1eed927SeopXD GEN_VEXT_VV(vmacc_vv_w, 4)
1919f1eed927SeopXD GEN_VEXT_VV(vmacc_vv_d, 8)
1920f1eed927SeopXD GEN_VEXT_VV(vnmsac_vv_b, 1)
1921f1eed927SeopXD GEN_VEXT_VV(vnmsac_vv_h, 2)
1922f1eed927SeopXD GEN_VEXT_VV(vnmsac_vv_w, 4)
1923f1eed927SeopXD GEN_VEXT_VV(vnmsac_vv_d, 8)
1924f1eed927SeopXD GEN_VEXT_VV(vmadd_vv_b, 1)
1925f1eed927SeopXD GEN_VEXT_VV(vmadd_vv_h, 2)
1926f1eed927SeopXD GEN_VEXT_VV(vmadd_vv_w, 4)
1927f1eed927SeopXD GEN_VEXT_VV(vmadd_vv_d, 8)
1928f1eed927SeopXD GEN_VEXT_VV(vnmsub_vv_b, 1)
1929f1eed927SeopXD GEN_VEXT_VV(vnmsub_vv_h, 2)
1930f1eed927SeopXD GEN_VEXT_VV(vnmsub_vv_w, 4)
1931f1eed927SeopXD GEN_VEXT_VV(vnmsub_vv_d, 8)
193254df813aSLIU Zhiwei 
193354df813aSLIU Zhiwei #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
193454df813aSLIU Zhiwei static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
193554df813aSLIU Zhiwei {                                                                   \
193654df813aSLIU Zhiwei     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
193754df813aSLIU Zhiwei     TD d = *((TD *)vd + HD(i));                                     \
193854df813aSLIU Zhiwei     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
193954df813aSLIU Zhiwei }
194054df813aSLIU Zhiwei 
194154df813aSLIU Zhiwei RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
194254df813aSLIU Zhiwei RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
194354df813aSLIU Zhiwei RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
194454df813aSLIU Zhiwei RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
194554df813aSLIU Zhiwei RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
194654df813aSLIU Zhiwei RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
194754df813aSLIU Zhiwei RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
194854df813aSLIU Zhiwei RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
194954df813aSLIU Zhiwei RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
195054df813aSLIU Zhiwei RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
195154df813aSLIU Zhiwei RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
195254df813aSLIU Zhiwei RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
195354df813aSLIU Zhiwei RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
195454df813aSLIU Zhiwei RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
195554df813aSLIU Zhiwei RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
195654df813aSLIU Zhiwei RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
19575c19fc15SeopXD GEN_VEXT_VX(vmacc_vx_b, 1)
19585c19fc15SeopXD GEN_VEXT_VX(vmacc_vx_h, 2)
19595c19fc15SeopXD GEN_VEXT_VX(vmacc_vx_w, 4)
19605c19fc15SeopXD GEN_VEXT_VX(vmacc_vx_d, 8)
19615c19fc15SeopXD GEN_VEXT_VX(vnmsac_vx_b, 1)
19625c19fc15SeopXD GEN_VEXT_VX(vnmsac_vx_h, 2)
19635c19fc15SeopXD GEN_VEXT_VX(vnmsac_vx_w, 4)
19645c19fc15SeopXD GEN_VEXT_VX(vnmsac_vx_d, 8)
19655c19fc15SeopXD GEN_VEXT_VX(vmadd_vx_b, 1)
19665c19fc15SeopXD GEN_VEXT_VX(vmadd_vx_h, 2)
19675c19fc15SeopXD GEN_VEXT_VX(vmadd_vx_w, 4)
19685c19fc15SeopXD GEN_VEXT_VX(vmadd_vx_d, 8)
19695c19fc15SeopXD GEN_VEXT_VX(vnmsub_vx_b, 1)
19705c19fc15SeopXD GEN_VEXT_VX(vnmsub_vx_h, 2)
19715c19fc15SeopXD GEN_VEXT_VX(vnmsub_vx_w, 4)
19725c19fc15SeopXD GEN_VEXT_VX(vnmsub_vx_d, 8)
19732b587b33SLIU Zhiwei 
19742b587b33SLIU Zhiwei /* Vector Widening Integer Multiply-Add Instructions */
19752b587b33SLIU Zhiwei RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
19762b587b33SLIU Zhiwei RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
19772b587b33SLIU Zhiwei RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
19782b587b33SLIU Zhiwei RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
19792b587b33SLIU Zhiwei RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
19802b587b33SLIU Zhiwei RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
19812b587b33SLIU Zhiwei RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
19822b587b33SLIU Zhiwei RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
19832b587b33SLIU Zhiwei RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1984f1eed927SeopXD GEN_VEXT_VV(vwmaccu_vv_b, 2)
1985f1eed927SeopXD GEN_VEXT_VV(vwmaccu_vv_h, 4)
1986f1eed927SeopXD GEN_VEXT_VV(vwmaccu_vv_w, 8)
1987f1eed927SeopXD GEN_VEXT_VV(vwmacc_vv_b, 2)
1988f1eed927SeopXD GEN_VEXT_VV(vwmacc_vv_h, 4)
1989f1eed927SeopXD GEN_VEXT_VV(vwmacc_vv_w, 8)
1990f1eed927SeopXD GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1991f1eed927SeopXD GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1992f1eed927SeopXD GEN_VEXT_VV(vwmaccsu_vv_w, 8)
19932b587b33SLIU Zhiwei 
19942b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
19952b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
19962b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
19972b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
19982b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
19992b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
20002b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
20012b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
20022b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
20032b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
20042b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
20052b587b33SLIU Zhiwei RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
20065c19fc15SeopXD GEN_VEXT_VX(vwmaccu_vx_b, 2)
20075c19fc15SeopXD GEN_VEXT_VX(vwmaccu_vx_h, 4)
20085c19fc15SeopXD GEN_VEXT_VX(vwmaccu_vx_w, 8)
20095c19fc15SeopXD GEN_VEXT_VX(vwmacc_vx_b, 2)
20105c19fc15SeopXD GEN_VEXT_VX(vwmacc_vx_h, 4)
20115c19fc15SeopXD GEN_VEXT_VX(vwmacc_vx_w, 8)
20125c19fc15SeopXD GEN_VEXT_VX(vwmaccsu_vx_b, 2)
20135c19fc15SeopXD GEN_VEXT_VX(vwmaccsu_vx_h, 4)
20145c19fc15SeopXD GEN_VEXT_VX(vwmaccsu_vx_w, 8)
20155c19fc15SeopXD GEN_VEXT_VX(vwmaccus_vx_b, 2)
20165c19fc15SeopXD GEN_VEXT_VX(vwmaccus_vx_h, 4)
20175c19fc15SeopXD GEN_VEXT_VX(vwmaccus_vx_w, 8)
2018f020a7a1SLIU Zhiwei 
2019f020a7a1SLIU Zhiwei /* Vector Integer Merge and Move Instructions */
20203479a814SFrank Chang #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
2021f020a7a1SLIU Zhiwei void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
2022f020a7a1SLIU Zhiwei                   uint32_t desc)                                     \
2023f020a7a1SLIU Zhiwei {                                                                    \
2024f020a7a1SLIU Zhiwei     uint32_t vl = env->vl;                                           \
202589a32de2SeopXD     uint32_t esz = sizeof(ETYPE);                                    \
202689a32de2SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);     \
202789a32de2SeopXD     uint32_t vta = vext_vta(desc);                                   \
2028f020a7a1SLIU Zhiwei     uint32_t i;                                                      \
2029f020a7a1SLIU Zhiwei                                                                      \
20307f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                \
2031df4252b2SDaniel Henrique Barboza                                                                      \
2032f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                             \
2033f020a7a1SLIU Zhiwei         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
2034f020a7a1SLIU Zhiwei         *((ETYPE *)vd + H(i)) = s1;                                  \
2035f020a7a1SLIU Zhiwei     }                                                                \
2036f714361eSFrank Chang     env->vstart = 0;                                                 \
203789a32de2SeopXD     /* set tail elements to 1s */                                    \
203889a32de2SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);         \
2039f020a7a1SLIU Zhiwei }
2040f020a7a1SLIU Zhiwei 
20413479a814SFrank Chang GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
20423479a814SFrank Chang GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
20433479a814SFrank Chang GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
20443479a814SFrank Chang GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
2045f020a7a1SLIU Zhiwei 
20463479a814SFrank Chang #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
2047f020a7a1SLIU Zhiwei void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
2048f020a7a1SLIU Zhiwei                   uint32_t desc)                                     \
2049f020a7a1SLIU Zhiwei {                                                                    \
2050f020a7a1SLIU Zhiwei     uint32_t vl = env->vl;                                           \
205189a32de2SeopXD     uint32_t esz = sizeof(ETYPE);                                    \
205289a32de2SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);     \
205389a32de2SeopXD     uint32_t vta = vext_vta(desc);                                   \
2054f020a7a1SLIU Zhiwei     uint32_t i;                                                      \
2055f020a7a1SLIU Zhiwei                                                                      \
20567f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                \
2057df4252b2SDaniel Henrique Barboza                                                                      \
2058f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                             \
2059f020a7a1SLIU Zhiwei         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
2060f020a7a1SLIU Zhiwei     }                                                                \
2061f714361eSFrank Chang     env->vstart = 0;                                                 \
206289a32de2SeopXD     /* set tail elements to 1s */                                    \
206389a32de2SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);         \
2064f020a7a1SLIU Zhiwei }
2065f020a7a1SLIU Zhiwei 
20663479a814SFrank Chang GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
20673479a814SFrank Chang GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
20683479a814SFrank Chang GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
20693479a814SFrank Chang GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
2070f020a7a1SLIU Zhiwei 
20713479a814SFrank Chang #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
2072f020a7a1SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
2073f020a7a1SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                 \
2074f020a7a1SLIU Zhiwei {                                                                    \
2075f020a7a1SLIU Zhiwei     uint32_t vl = env->vl;                                           \
207689a32de2SeopXD     uint32_t esz = sizeof(ETYPE);                                    \
207789a32de2SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);     \
207889a32de2SeopXD     uint32_t vta = vext_vta(desc);                                   \
2079f020a7a1SLIU Zhiwei     uint32_t i;                                                      \
2080f020a7a1SLIU Zhiwei                                                                      \
20817f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                \
2082df4252b2SDaniel Henrique Barboza                                                                      \
2083f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                             \
2084f9298de5SFrank Chang         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
2085f020a7a1SLIU Zhiwei         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
2086f020a7a1SLIU Zhiwei     }                                                                \
2087f714361eSFrank Chang     env->vstart = 0;                                                 \
208889a32de2SeopXD     /* set tail elements to 1s */                                    \
208989a32de2SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);         \
2090f020a7a1SLIU Zhiwei }
2091f020a7a1SLIU Zhiwei 
20923479a814SFrank Chang GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
20933479a814SFrank Chang GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
20943479a814SFrank Chang GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
20953479a814SFrank Chang GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
2096f020a7a1SLIU Zhiwei 
20973479a814SFrank Chang #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
2098f020a7a1SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
2099f020a7a1SLIU Zhiwei                   void *vs2, CPURISCVState *env, uint32_t desc)      \
2100f020a7a1SLIU Zhiwei {                                                                    \
2101f020a7a1SLIU Zhiwei     uint32_t vl = env->vl;                                           \
210289a32de2SeopXD     uint32_t esz = sizeof(ETYPE);                                    \
210389a32de2SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);     \
210489a32de2SeopXD     uint32_t vta = vext_vta(desc);                                   \
2105f020a7a1SLIU Zhiwei     uint32_t i;                                                      \
2106f020a7a1SLIU Zhiwei                                                                      \
21077f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                \
2108df4252b2SDaniel Henrique Barboza                                                                      \
2109f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                             \
2110f020a7a1SLIU Zhiwei         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
2111f9298de5SFrank Chang         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
2112f020a7a1SLIU Zhiwei                    (ETYPE)(target_long)s1);                          \
2113f020a7a1SLIU Zhiwei         *((ETYPE *)vd + H(i)) = d;                                   \
2114f020a7a1SLIU Zhiwei     }                                                                \
2115f714361eSFrank Chang     env->vstart = 0;                                                 \
211689a32de2SeopXD     /* set tail elements to 1s */                                    \
211789a32de2SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);         \
2118f020a7a1SLIU Zhiwei }
2119f020a7a1SLIU Zhiwei 
21203479a814SFrank Chang GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
21213479a814SFrank Chang GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
21223479a814SFrank Chang GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
21233479a814SFrank Chang GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
2124eb2650e3SLIU Zhiwei 
2125eb2650e3SLIU Zhiwei /*
21263b57254dSWeiwei Li  * Vector Fixed-Point Arithmetic Instructions
2127eb2650e3SLIU Zhiwei  */
2128eb2650e3SLIU Zhiwei 
2129eb2650e3SLIU Zhiwei /* Vector Single-Width Saturating Add and Subtract */
2130eb2650e3SLIU Zhiwei 
2131eb2650e3SLIU Zhiwei /*
2132eb2650e3SLIU Zhiwei  * As fixed point instructions probably have round mode and saturation,
2133eb2650e3SLIU Zhiwei  * define common macros for fixed point here.
2134eb2650e3SLIU Zhiwei  */
2135eb2650e3SLIU Zhiwei typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2136eb2650e3SLIU Zhiwei                           CPURISCVState *env, int vxrm);
2137eb2650e3SLIU Zhiwei 
2138eb2650e3SLIU Zhiwei #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
2139eb2650e3SLIU Zhiwei static inline void                                                  \
2140eb2650e3SLIU Zhiwei do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
2141eb2650e3SLIU Zhiwei           CPURISCVState *env, int vxrm)                             \
2142eb2650e3SLIU Zhiwei {                                                                   \
2143eb2650e3SLIU Zhiwei     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
2144eb2650e3SLIU Zhiwei     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2145eb2650e3SLIU Zhiwei     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
2146eb2650e3SLIU Zhiwei }
2147eb2650e3SLIU Zhiwei 
2148eb2650e3SLIU Zhiwei static inline void
2149eb2650e3SLIU Zhiwei vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2150eb2650e3SLIU Zhiwei              CPURISCVState *env,
2151f9298de5SFrank Chang              uint32_t vl, uint32_t vm, int vxrm,
vext_vv_rm_1(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivv2_rm_fn * fn,uint32_t vma,uint32_t esz)215272e17a9fSYueh-Ting (eop) Chen              opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
2153eb2650e3SLIU Zhiwei {
2154f714361eSFrank Chang     for (uint32_t i = env->vstart; i < vl; i++) {
2155f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {
215672e17a9fSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */
215772e17a9fSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2158eb2650e3SLIU Zhiwei             continue;
2159eb2650e3SLIU Zhiwei         }
2160eb2650e3SLIU Zhiwei         fn(vd, vs1, vs2, i, env, vxrm);
2161eb2650e3SLIU Zhiwei     }
2162f714361eSFrank Chang     env->vstart = 0;
2163eb2650e3SLIU Zhiwei }
2164eb2650e3SLIU Zhiwei 
2165eb2650e3SLIU Zhiwei static inline void
2166eb2650e3SLIU Zhiwei vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2167eb2650e3SLIU Zhiwei              CPURISCVState *env,
21688a085fb2SeopXD              uint32_t desc,
216909106eedSeopXD              opivv2_rm_fn *fn, uint32_t esz)
2170eb2650e3SLIU Zhiwei {
vext_vv_rm_2(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t desc,opivv2_rm_fn * fn,uint32_t esz)2171eb2650e3SLIU Zhiwei     uint32_t vm = vext_vm(desc);
2172eb2650e3SLIU Zhiwei     uint32_t vl = env->vl;
217309106eedSeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);
217409106eedSeopXD     uint32_t vta = vext_vta(desc);
217572e17a9fSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);
2176eb2650e3SLIU Zhiwei 
2177*3cdd1f45SChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);
2178*3cdd1f45SChao Liu 
2179eb2650e3SLIU Zhiwei     switch (env->vxrm) {
2180eb2650e3SLIU Zhiwei     case 0: /* rnu */
2181eb2650e3SLIU Zhiwei         vext_vv_rm_1(vd, v0, vs1, vs2,
218272e17a9fSYueh-Ting (eop) Chen                      env, vl, vm, 0, fn, vma, esz);
2183eb2650e3SLIU Zhiwei         break;
2184eb2650e3SLIU Zhiwei     case 1: /* rne */
2185eb2650e3SLIU Zhiwei         vext_vv_rm_1(vd, v0, vs1, vs2,
218672e17a9fSYueh-Ting (eop) Chen                      env, vl, vm, 1, fn, vma, esz);
2187eb2650e3SLIU Zhiwei         break;
2188eb2650e3SLIU Zhiwei     case 2: /* rdn */
2189eb2650e3SLIU Zhiwei         vext_vv_rm_1(vd, v0, vs1, vs2,
219072e17a9fSYueh-Ting (eop) Chen                      env, vl, vm, 2, fn, vma, esz);
2191eb2650e3SLIU Zhiwei         break;
2192eb2650e3SLIU Zhiwei     default: /* rod */
2193eb2650e3SLIU Zhiwei         vext_vv_rm_1(vd, v0, vs1, vs2,
219472e17a9fSYueh-Ting (eop) Chen                      env, vl, vm, 3, fn, vma, esz);
2195eb2650e3SLIU Zhiwei         break;
2196eb2650e3SLIU Zhiwei     }
219709106eedSeopXD     /* set tail elements to 1s */
219809106eedSeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2199eb2650e3SLIU Zhiwei }
2200eb2650e3SLIU Zhiwei 
2201eb2650e3SLIU Zhiwei /* generate helpers for fixed point instructions with OPIVV format */
220209106eedSeopXD #define GEN_VEXT_VV_RM(NAME, ESZ)                               \
2203eb2650e3SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
2204eb2650e3SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)            \
2205eb2650e3SLIU Zhiwei {                                                               \
22068a085fb2SeopXD     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc,                   \
220709106eedSeopXD                  do_##NAME, ESZ);                               \
2208eb2650e3SLIU Zhiwei }
2209eb2650e3SLIU Zhiwei 
2210246f8796SWeiwei Li static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2211246f8796SWeiwei Li                              uint8_t b)
2212eb2650e3SLIU Zhiwei {
saddu8(CPURISCVState * env,int vxrm,uint8_t a,uint8_t b)2213eb2650e3SLIU Zhiwei     uint8_t res = a + b;
2214eb2650e3SLIU Zhiwei     if (res < a) {
2215eb2650e3SLIU Zhiwei         res = UINT8_MAX;
2216eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2217eb2650e3SLIU Zhiwei     }
2218eb2650e3SLIU Zhiwei     return res;
2219eb2650e3SLIU Zhiwei }
2220eb2650e3SLIU Zhiwei 
2221eb2650e3SLIU Zhiwei static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2222eb2650e3SLIU Zhiwei                                uint16_t b)
2223eb2650e3SLIU Zhiwei {
saddu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2224eb2650e3SLIU Zhiwei     uint16_t res = a + b;
2225eb2650e3SLIU Zhiwei     if (res < a) {
2226eb2650e3SLIU Zhiwei         res = UINT16_MAX;
2227eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2228eb2650e3SLIU Zhiwei     }
2229eb2650e3SLIU Zhiwei     return res;
2230eb2650e3SLIU Zhiwei }
2231eb2650e3SLIU Zhiwei 
2232eb2650e3SLIU Zhiwei static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2233eb2650e3SLIU Zhiwei                                uint32_t b)
2234eb2650e3SLIU Zhiwei {
saddu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2235eb2650e3SLIU Zhiwei     uint32_t res = a + b;
2236eb2650e3SLIU Zhiwei     if (res < a) {
2237eb2650e3SLIU Zhiwei         res = UINT32_MAX;
2238eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2239eb2650e3SLIU Zhiwei     }
2240eb2650e3SLIU Zhiwei     return res;
2241eb2650e3SLIU Zhiwei }
2242eb2650e3SLIU Zhiwei 
2243eb2650e3SLIU Zhiwei static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2244eb2650e3SLIU Zhiwei                                uint64_t b)
2245eb2650e3SLIU Zhiwei {
saddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2246eb2650e3SLIU Zhiwei     uint64_t res = a + b;
2247eb2650e3SLIU Zhiwei     if (res < a) {
2248eb2650e3SLIU Zhiwei         res = UINT64_MAX;
2249eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2250eb2650e3SLIU Zhiwei     }
2251eb2650e3SLIU Zhiwei     return res;
2252eb2650e3SLIU Zhiwei }
2253eb2650e3SLIU Zhiwei 
2254eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2255eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2256eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2257eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
225809106eedSeopXD GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
225909106eedSeopXD GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
226009106eedSeopXD GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
226109106eedSeopXD GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
2262eb2650e3SLIU Zhiwei 
2263eb2650e3SLIU Zhiwei typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2264eb2650e3SLIU Zhiwei                           CPURISCVState *env, int vxrm);
2265eb2650e3SLIU Zhiwei 
2266eb2650e3SLIU Zhiwei #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2267eb2650e3SLIU Zhiwei static inline void                                                  \
2268eb2650e3SLIU Zhiwei do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2269eb2650e3SLIU Zhiwei           CPURISCVState *env, int vxrm)                             \
2270eb2650e3SLIU Zhiwei {                                                                   \
2271eb2650e3SLIU Zhiwei     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2272eb2650e3SLIU Zhiwei     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2273eb2650e3SLIU Zhiwei }
2274eb2650e3SLIU Zhiwei 
2275eb2650e3SLIU Zhiwei static inline void
2276eb2650e3SLIU Zhiwei vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2277eb2650e3SLIU Zhiwei              CPURISCVState *env,
2278f9298de5SFrank Chang              uint32_t vl, uint32_t vm, int vxrm,
vext_vx_rm_1(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivx2_rm_fn * fn,uint32_t vma,uint32_t esz)227972e17a9fSYueh-Ting (eop) Chen              opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
2280eb2650e3SLIU Zhiwei {
2281f714361eSFrank Chang     for (uint32_t i = env->vstart; i < vl; i++) {
2282f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {
228372e17a9fSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */
228472e17a9fSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2285eb2650e3SLIU Zhiwei             continue;
2286eb2650e3SLIU Zhiwei         }
2287eb2650e3SLIU Zhiwei         fn(vd, s1, vs2, i, env, vxrm);
2288eb2650e3SLIU Zhiwei     }
2289f714361eSFrank Chang     env->vstart = 0;
2290eb2650e3SLIU Zhiwei }
2291eb2650e3SLIU Zhiwei 
2292eb2650e3SLIU Zhiwei static inline void
2293eb2650e3SLIU Zhiwei vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2294eb2650e3SLIU Zhiwei              CPURISCVState *env,
22958a085fb2SeopXD              uint32_t desc,
229609106eedSeopXD              opivx2_rm_fn *fn, uint32_t esz)
2297eb2650e3SLIU Zhiwei {
vext_vx_rm_2(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t desc,opivx2_rm_fn * fn,uint32_t esz)2298eb2650e3SLIU Zhiwei     uint32_t vm = vext_vm(desc);
2299eb2650e3SLIU Zhiwei     uint32_t vl = env->vl;
230009106eedSeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);
230109106eedSeopXD     uint32_t vta = vext_vta(desc);
230272e17a9fSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);
2303eb2650e3SLIU Zhiwei 
2304*3cdd1f45SChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);
2305*3cdd1f45SChao Liu 
2306eb2650e3SLIU Zhiwei     switch (env->vxrm) {
2307eb2650e3SLIU Zhiwei     case 0: /* rnu */
2308eb2650e3SLIU Zhiwei         vext_vx_rm_1(vd, v0, s1, vs2,
230972e17a9fSYueh-Ting (eop) Chen                      env, vl, vm, 0, fn, vma, esz);
2310eb2650e3SLIU Zhiwei         break;
2311eb2650e3SLIU Zhiwei     case 1: /* rne */
2312eb2650e3SLIU Zhiwei         vext_vx_rm_1(vd, v0, s1, vs2,
231372e17a9fSYueh-Ting (eop) Chen                      env, vl, vm, 1, fn, vma, esz);
2314eb2650e3SLIU Zhiwei         break;
2315eb2650e3SLIU Zhiwei     case 2: /* rdn */
2316eb2650e3SLIU Zhiwei         vext_vx_rm_1(vd, v0, s1, vs2,
231772e17a9fSYueh-Ting (eop) Chen                      env, vl, vm, 2, fn, vma, esz);
2318eb2650e3SLIU Zhiwei         break;
2319eb2650e3SLIU Zhiwei     default: /* rod */
2320eb2650e3SLIU Zhiwei         vext_vx_rm_1(vd, v0, s1, vs2,
232172e17a9fSYueh-Ting (eop) Chen                      env, vl, vm, 3, fn, vma, esz);
2322eb2650e3SLIU Zhiwei         break;
2323eb2650e3SLIU Zhiwei     }
232409106eedSeopXD     /* set tail elements to 1s */
232509106eedSeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2326eb2650e3SLIU Zhiwei }
2327eb2650e3SLIU Zhiwei 
2328eb2650e3SLIU Zhiwei /* generate helpers for fixed point instructions with OPIVX format */
232909106eedSeopXD #define GEN_VEXT_VX_RM(NAME, ESZ)                         \
2330eb2650e3SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2331c45eff30SWeiwei Li                   void *vs2, CPURISCVState *env,          \
2332c45eff30SWeiwei Li                   uint32_t desc)                          \
2333eb2650e3SLIU Zhiwei {                                                         \
23348a085fb2SeopXD     vext_vx_rm_2(vd, v0, s1, vs2, env, desc,              \
233509106eedSeopXD                  do_##NAME, ESZ);                         \
2336eb2650e3SLIU Zhiwei }
2337eb2650e3SLIU Zhiwei 
2338eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2339eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2340eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
RVVCALL(OPIVX2_RM,vsaddu_vx_b,OP_UUU_B,H1,H1,saddu8)2341eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
234209106eedSeopXD GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
234309106eedSeopXD GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
234409106eedSeopXD GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
234509106eedSeopXD GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
2346eb2650e3SLIU Zhiwei 
2347eb2650e3SLIU Zhiwei static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2348eb2650e3SLIU Zhiwei {
2349eb2650e3SLIU Zhiwei     int8_t res = a + b;
2350eb2650e3SLIU Zhiwei     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2351eb2650e3SLIU Zhiwei         res = a > 0 ? INT8_MAX : INT8_MIN;
2352eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2353eb2650e3SLIU Zhiwei     }
2354eb2650e3SLIU Zhiwei     return res;
2355eb2650e3SLIU Zhiwei }
2356eb2650e3SLIU Zhiwei 
2357246f8796SWeiwei Li static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2358246f8796SWeiwei Li                              int16_t b)
2359eb2650e3SLIU Zhiwei {
sadd16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2360eb2650e3SLIU Zhiwei     int16_t res = a + b;
2361eb2650e3SLIU Zhiwei     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2362eb2650e3SLIU Zhiwei         res = a > 0 ? INT16_MAX : INT16_MIN;
2363eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2364eb2650e3SLIU Zhiwei     }
2365eb2650e3SLIU Zhiwei     return res;
2366eb2650e3SLIU Zhiwei }
2367eb2650e3SLIU Zhiwei 
2368246f8796SWeiwei Li static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2369246f8796SWeiwei Li                              int32_t b)
2370eb2650e3SLIU Zhiwei {
sadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2371eb2650e3SLIU Zhiwei     int32_t res = a + b;
2372eb2650e3SLIU Zhiwei     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2373eb2650e3SLIU Zhiwei         res = a > 0 ? INT32_MAX : INT32_MIN;
2374eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2375eb2650e3SLIU Zhiwei     }
2376eb2650e3SLIU Zhiwei     return res;
2377eb2650e3SLIU Zhiwei }
2378eb2650e3SLIU Zhiwei 
2379246f8796SWeiwei Li static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2380246f8796SWeiwei Li                              int64_t b)
2381eb2650e3SLIU Zhiwei {
sadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2382eb2650e3SLIU Zhiwei     int64_t res = a + b;
2383eb2650e3SLIU Zhiwei     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2384eb2650e3SLIU Zhiwei         res = a > 0 ? INT64_MAX : INT64_MIN;
2385eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2386eb2650e3SLIU Zhiwei     }
2387eb2650e3SLIU Zhiwei     return res;
2388eb2650e3SLIU Zhiwei }
2389eb2650e3SLIU Zhiwei 
2390eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2391eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2392eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
RVVCALL(OPIVV2_RM,vsadd_vv_b,OP_SSS_B,H1,H1,H1,sadd8)2393eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
239409106eedSeopXD GEN_VEXT_VV_RM(vsadd_vv_b, 1)
239509106eedSeopXD GEN_VEXT_VV_RM(vsadd_vv_h, 2)
239609106eedSeopXD GEN_VEXT_VV_RM(vsadd_vv_w, 4)
239709106eedSeopXD GEN_VEXT_VV_RM(vsadd_vv_d, 8)
2398eb2650e3SLIU Zhiwei 
2399eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2400eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2401eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2402eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
240309106eedSeopXD GEN_VEXT_VX_RM(vsadd_vx_b, 1)
240409106eedSeopXD GEN_VEXT_VX_RM(vsadd_vx_h, 2)
240509106eedSeopXD GEN_VEXT_VX_RM(vsadd_vx_w, 4)
240609106eedSeopXD GEN_VEXT_VX_RM(vsadd_vx_d, 8)
2407eb2650e3SLIU Zhiwei 
2408246f8796SWeiwei Li static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2409246f8796SWeiwei Li                              uint8_t b)
2410eb2650e3SLIU Zhiwei {
2411eb2650e3SLIU Zhiwei     uint8_t res = a - b;
2412eb2650e3SLIU Zhiwei     if (res > a) {
2413eb2650e3SLIU Zhiwei         res = 0;
2414eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2415eb2650e3SLIU Zhiwei     }
2416eb2650e3SLIU Zhiwei     return res;
2417eb2650e3SLIU Zhiwei }
2418eb2650e3SLIU Zhiwei 
2419eb2650e3SLIU Zhiwei static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2420eb2650e3SLIU Zhiwei                                uint16_t b)
2421eb2650e3SLIU Zhiwei {
ssubu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2422eb2650e3SLIU Zhiwei     uint16_t res = a - b;
2423eb2650e3SLIU Zhiwei     if (res > a) {
2424eb2650e3SLIU Zhiwei         res = 0;
2425eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2426eb2650e3SLIU Zhiwei     }
2427eb2650e3SLIU Zhiwei     return res;
2428eb2650e3SLIU Zhiwei }
2429eb2650e3SLIU Zhiwei 
2430eb2650e3SLIU Zhiwei static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2431eb2650e3SLIU Zhiwei                                uint32_t b)
2432eb2650e3SLIU Zhiwei {
ssubu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2433eb2650e3SLIU Zhiwei     uint32_t res = a - b;
2434eb2650e3SLIU Zhiwei     if (res > a) {
2435eb2650e3SLIU Zhiwei         res = 0;
2436eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2437eb2650e3SLIU Zhiwei     }
2438eb2650e3SLIU Zhiwei     return res;
2439eb2650e3SLIU Zhiwei }
2440eb2650e3SLIU Zhiwei 
2441eb2650e3SLIU Zhiwei static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2442eb2650e3SLIU Zhiwei                                uint64_t b)
2443eb2650e3SLIU Zhiwei {
ssubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2444eb2650e3SLIU Zhiwei     uint64_t res = a - b;
2445eb2650e3SLIU Zhiwei     if (res > a) {
2446eb2650e3SLIU Zhiwei         res = 0;
2447eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2448eb2650e3SLIU Zhiwei     }
2449eb2650e3SLIU Zhiwei     return res;
2450eb2650e3SLIU Zhiwei }
2451eb2650e3SLIU Zhiwei 
2452eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2453eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2454eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
RVVCALL(OPIVV2_RM,vssubu_vv_b,OP_UUU_B,H1,H1,H1,ssubu8)2455eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
245609106eedSeopXD GEN_VEXT_VV_RM(vssubu_vv_b, 1)
245709106eedSeopXD GEN_VEXT_VV_RM(vssubu_vv_h, 2)
245809106eedSeopXD GEN_VEXT_VV_RM(vssubu_vv_w, 4)
245909106eedSeopXD GEN_VEXT_VV_RM(vssubu_vv_d, 8)
2460eb2650e3SLIU Zhiwei 
2461eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2462eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2463eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2464eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
246509106eedSeopXD GEN_VEXT_VX_RM(vssubu_vx_b, 1)
246609106eedSeopXD GEN_VEXT_VX_RM(vssubu_vx_h, 2)
246709106eedSeopXD GEN_VEXT_VX_RM(vssubu_vx_w, 4)
246809106eedSeopXD GEN_VEXT_VX_RM(vssubu_vx_d, 8)
2469eb2650e3SLIU Zhiwei 
2470eb2650e3SLIU Zhiwei static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2471eb2650e3SLIU Zhiwei {
2472eb2650e3SLIU Zhiwei     int8_t res = a - b;
2473eb2650e3SLIU Zhiwei     if ((res ^ a) & (a ^ b) & INT8_MIN) {
247465606f21SLIU Zhiwei         res = a >= 0 ? INT8_MAX : INT8_MIN;
2475eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2476eb2650e3SLIU Zhiwei     }
2477eb2650e3SLIU Zhiwei     return res;
2478eb2650e3SLIU Zhiwei }
2479eb2650e3SLIU Zhiwei 
2480246f8796SWeiwei Li static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2481246f8796SWeiwei Li                              int16_t b)
2482eb2650e3SLIU Zhiwei {
ssub16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2483eb2650e3SLIU Zhiwei     int16_t res = a - b;
2484eb2650e3SLIU Zhiwei     if ((res ^ a) & (a ^ b) & INT16_MIN) {
248565606f21SLIU Zhiwei         res = a >= 0 ? INT16_MAX : INT16_MIN;
2486eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2487eb2650e3SLIU Zhiwei     }
2488eb2650e3SLIU Zhiwei     return res;
2489eb2650e3SLIU Zhiwei }
2490eb2650e3SLIU Zhiwei 
2491246f8796SWeiwei Li static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2492246f8796SWeiwei Li                              int32_t b)
2493eb2650e3SLIU Zhiwei {
ssub32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2494eb2650e3SLIU Zhiwei     int32_t res = a - b;
2495eb2650e3SLIU Zhiwei     if ((res ^ a) & (a ^ b) & INT32_MIN) {
249665606f21SLIU Zhiwei         res = a >= 0 ? INT32_MAX : INT32_MIN;
2497eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2498eb2650e3SLIU Zhiwei     }
2499eb2650e3SLIU Zhiwei     return res;
2500eb2650e3SLIU Zhiwei }
2501eb2650e3SLIU Zhiwei 
2502246f8796SWeiwei Li static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2503246f8796SWeiwei Li                              int64_t b)
2504eb2650e3SLIU Zhiwei {
ssub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2505eb2650e3SLIU Zhiwei     int64_t res = a - b;
2506eb2650e3SLIU Zhiwei     if ((res ^ a) & (a ^ b) & INT64_MIN) {
250765606f21SLIU Zhiwei         res = a >= 0 ? INT64_MAX : INT64_MIN;
2508eb2650e3SLIU Zhiwei         env->vxsat = 0x1;
2509eb2650e3SLIU Zhiwei     }
2510eb2650e3SLIU Zhiwei     return res;
2511eb2650e3SLIU Zhiwei }
2512eb2650e3SLIU Zhiwei 
2513eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2514eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2515eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
RVVCALL(OPIVV2_RM,vssub_vv_b,OP_SSS_B,H1,H1,H1,ssub8)2516eb2650e3SLIU Zhiwei RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
251709106eedSeopXD GEN_VEXT_VV_RM(vssub_vv_b, 1)
251809106eedSeopXD GEN_VEXT_VV_RM(vssub_vv_h, 2)
251909106eedSeopXD GEN_VEXT_VV_RM(vssub_vv_w, 4)
252009106eedSeopXD GEN_VEXT_VV_RM(vssub_vv_d, 8)
2521eb2650e3SLIU Zhiwei 
2522eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2523eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2524eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2525eb2650e3SLIU Zhiwei RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
252609106eedSeopXD GEN_VEXT_VX_RM(vssub_vx_b, 1)
252709106eedSeopXD GEN_VEXT_VX_RM(vssub_vx_h, 2)
252809106eedSeopXD GEN_VEXT_VX_RM(vssub_vx_w, 4)
252909106eedSeopXD GEN_VEXT_VX_RM(vssub_vx_d, 8)
2530b7aee481SLIU Zhiwei 
2531b7aee481SLIU Zhiwei /* Vector Single-Width Averaging Add and Subtract */
2532b7aee481SLIU Zhiwei static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2533b7aee481SLIU Zhiwei {
2534b7aee481SLIU Zhiwei     uint8_t d = extract64(v, shift, 1);
2535b7aee481SLIU Zhiwei     uint8_t d1;
2536b7aee481SLIU Zhiwei     uint64_t D1, D2;
2537b7aee481SLIU Zhiwei 
2538b7aee481SLIU Zhiwei     if (shift == 0 || shift > 64) {
2539b7aee481SLIU Zhiwei         return 0;
2540b7aee481SLIU Zhiwei     }
2541b7aee481SLIU Zhiwei 
2542b7aee481SLIU Zhiwei     d1 = extract64(v, shift - 1, 1);
2543b7aee481SLIU Zhiwei     D1 = extract64(v, 0, shift);
2544b7aee481SLIU Zhiwei     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2545b7aee481SLIU Zhiwei         return d1;
2546b7aee481SLIU Zhiwei     } else if (vxrm == 1) { /* round-to-nearest-even */
2547b7aee481SLIU Zhiwei         if (shift > 1) {
2548b7aee481SLIU Zhiwei             D2 = extract64(v, 0, shift - 1);
2549b7aee481SLIU Zhiwei             return d1 & ((D2 != 0) | d);
2550b7aee481SLIU Zhiwei         } else {
2551b7aee481SLIU Zhiwei             return d1 & d;
2552b7aee481SLIU Zhiwei         }
2553b7aee481SLIU Zhiwei     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2554b7aee481SLIU Zhiwei         return !d & (D1 != 0);
2555b7aee481SLIU Zhiwei     }
2556b7aee481SLIU Zhiwei     return 0; /* round-down (truncate) */
2557b7aee481SLIU Zhiwei }
2558b7aee481SLIU Zhiwei 
2559246f8796SWeiwei Li static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2560246f8796SWeiwei Li                              int32_t b)
2561b7aee481SLIU Zhiwei {
aadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2562b7aee481SLIU Zhiwei     int64_t res = (int64_t)a + b;
2563b7aee481SLIU Zhiwei     uint8_t round = get_round(vxrm, res, 1);
2564b7aee481SLIU Zhiwei 
2565b7aee481SLIU Zhiwei     return (res >> 1) + round;
2566b7aee481SLIU Zhiwei }
2567b7aee481SLIU Zhiwei 
2568246f8796SWeiwei Li static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2569246f8796SWeiwei Li                              int64_t b)
2570b7aee481SLIU Zhiwei {
aadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2571b7aee481SLIU Zhiwei     int64_t res = a + b;
2572b7aee481SLIU Zhiwei     uint8_t round = get_round(vxrm, res, 1);
2573b7aee481SLIU Zhiwei     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2574b7aee481SLIU Zhiwei 
2575b7aee481SLIU Zhiwei     /* With signed overflow, bit 64 is inverse of bit 63. */
2576b7aee481SLIU Zhiwei     return ((res >> 1) ^ over) + round;
2577b7aee481SLIU Zhiwei }
2578b7aee481SLIU Zhiwei 
2579b7aee481SLIU Zhiwei RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2580b7aee481SLIU Zhiwei RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2581b7aee481SLIU Zhiwei RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
RVVCALL(OPIVV2_RM,vaadd_vv_b,OP_SSS_B,H1,H1,H1,aadd32)2582b7aee481SLIU Zhiwei RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
258309106eedSeopXD GEN_VEXT_VV_RM(vaadd_vv_b, 1)
258409106eedSeopXD GEN_VEXT_VV_RM(vaadd_vv_h, 2)
258509106eedSeopXD GEN_VEXT_VV_RM(vaadd_vv_w, 4)
258609106eedSeopXD GEN_VEXT_VV_RM(vaadd_vv_d, 8)
2587b7aee481SLIU Zhiwei 
2588b7aee481SLIU Zhiwei RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2589b7aee481SLIU Zhiwei RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2590b7aee481SLIU Zhiwei RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2591b7aee481SLIU Zhiwei RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
259209106eedSeopXD GEN_VEXT_VX_RM(vaadd_vx_b, 1)
259309106eedSeopXD GEN_VEXT_VX_RM(vaadd_vx_h, 2)
259409106eedSeopXD GEN_VEXT_VX_RM(vaadd_vx_w, 4)
259509106eedSeopXD GEN_VEXT_VX_RM(vaadd_vx_d, 8)
2596b7aee481SLIU Zhiwei 
25978b99a110SFrank Chang static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
25988b99a110SFrank Chang                                uint32_t a, uint32_t b)
25998b99a110SFrank Chang {
26008b99a110SFrank Chang     uint64_t res = (uint64_t)a + b;
26018b99a110SFrank Chang     uint8_t round = get_round(vxrm, res, 1);
26028b99a110SFrank Chang 
26038b99a110SFrank Chang     return (res >> 1) + round;
26048b99a110SFrank Chang }
26058b99a110SFrank Chang 
26068b99a110SFrank Chang static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
26078b99a110SFrank Chang                                uint64_t a, uint64_t b)
26088b99a110SFrank Chang {
aaddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)26098b99a110SFrank Chang     uint64_t res = a + b;
26108b99a110SFrank Chang     uint8_t round = get_round(vxrm, res, 1);
26118b99a110SFrank Chang     uint64_t over = (uint64_t)(res < a) << 63;
26128b99a110SFrank Chang 
26138b99a110SFrank Chang     return ((res >> 1) | over) + round;
26148b99a110SFrank Chang }
26158b99a110SFrank Chang 
26168b99a110SFrank Chang RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
26178b99a110SFrank Chang RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
26188b99a110SFrank Chang RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
RVVCALL(OPIVV2_RM,vaaddu_vv_b,OP_UUU_B,H1,H1,H1,aaddu32)26198b99a110SFrank Chang RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
262009106eedSeopXD GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
262109106eedSeopXD GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
262209106eedSeopXD GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
262309106eedSeopXD GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
26248b99a110SFrank Chang 
26258b99a110SFrank Chang RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
26268b99a110SFrank Chang RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
26278b99a110SFrank Chang RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
26288b99a110SFrank Chang RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
262909106eedSeopXD GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
263009106eedSeopXD GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
263109106eedSeopXD GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
263209106eedSeopXD GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
26338b99a110SFrank Chang 
2634246f8796SWeiwei Li static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2635246f8796SWeiwei Li                              int32_t b)
2636b7aee481SLIU Zhiwei {
2637b7aee481SLIU Zhiwei     int64_t res = (int64_t)a - b;
2638b7aee481SLIU Zhiwei     uint8_t round = get_round(vxrm, res, 1);
2639b7aee481SLIU Zhiwei 
2640b7aee481SLIU Zhiwei     return (res >> 1) + round;
2641b7aee481SLIU Zhiwei }
2642b7aee481SLIU Zhiwei 
2643246f8796SWeiwei Li static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2644246f8796SWeiwei Li                              int64_t b)
2645b7aee481SLIU Zhiwei {
asub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2646b7aee481SLIU Zhiwei     int64_t res = (int64_t)a - b;
2647b7aee481SLIU Zhiwei     uint8_t round = get_round(vxrm, res, 1);
2648b7aee481SLIU Zhiwei     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2649b7aee481SLIU Zhiwei 
2650b7aee481SLIU Zhiwei     /* With signed overflow, bit 64 is inverse of bit 63. */
2651b7aee481SLIU Zhiwei     return ((res >> 1) ^ over) + round;
2652b7aee481SLIU Zhiwei }
2653b7aee481SLIU Zhiwei 
2654b7aee481SLIU Zhiwei RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2655b7aee481SLIU Zhiwei RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2656b7aee481SLIU Zhiwei RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
RVVCALL(OPIVV2_RM,vasub_vv_b,OP_SSS_B,H1,H1,H1,asub32)2657b7aee481SLIU Zhiwei RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
265809106eedSeopXD GEN_VEXT_VV_RM(vasub_vv_b, 1)
265909106eedSeopXD GEN_VEXT_VV_RM(vasub_vv_h, 2)
266009106eedSeopXD GEN_VEXT_VV_RM(vasub_vv_w, 4)
266109106eedSeopXD GEN_VEXT_VV_RM(vasub_vv_d, 8)
2662b7aee481SLIU Zhiwei 
2663b7aee481SLIU Zhiwei RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2664b7aee481SLIU Zhiwei RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2665b7aee481SLIU Zhiwei RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2666b7aee481SLIU Zhiwei RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
266709106eedSeopXD GEN_VEXT_VX_RM(vasub_vx_b, 1)
266809106eedSeopXD GEN_VEXT_VX_RM(vasub_vx_h, 2)
266909106eedSeopXD GEN_VEXT_VX_RM(vasub_vx_w, 4)
267009106eedSeopXD GEN_VEXT_VX_RM(vasub_vx_d, 8)
26719f0ff9e5SLIU Zhiwei 
26728b99a110SFrank Chang static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
26738b99a110SFrank Chang                                uint32_t a, uint32_t b)
26748b99a110SFrank Chang {
26758b99a110SFrank Chang     int64_t res = (int64_t)a - b;
26768b99a110SFrank Chang     uint8_t round = get_round(vxrm, res, 1);
26778b99a110SFrank Chang 
26788b99a110SFrank Chang     return (res >> 1) + round;
26798b99a110SFrank Chang }
26808b99a110SFrank Chang 
26818b99a110SFrank Chang static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
26828b99a110SFrank Chang                                uint64_t a, uint64_t b)
26838b99a110SFrank Chang {
asubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)26848b99a110SFrank Chang     uint64_t res = (uint64_t)a - b;
26858b99a110SFrank Chang     uint8_t round = get_round(vxrm, res, 1);
26868b99a110SFrank Chang     uint64_t over = (uint64_t)(res > a) << 63;
26878b99a110SFrank Chang 
26888b99a110SFrank Chang     return ((res >> 1) | over) + round;
26898b99a110SFrank Chang }
26908b99a110SFrank Chang 
26918b99a110SFrank Chang RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
26928b99a110SFrank Chang RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
26938b99a110SFrank Chang RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
RVVCALL(OPIVV2_RM,vasubu_vv_b,OP_UUU_B,H1,H1,H1,asubu32)26948b99a110SFrank Chang RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
269509106eedSeopXD GEN_VEXT_VV_RM(vasubu_vv_b, 1)
269609106eedSeopXD GEN_VEXT_VV_RM(vasubu_vv_h, 2)
269709106eedSeopXD GEN_VEXT_VV_RM(vasubu_vv_w, 4)
269809106eedSeopXD GEN_VEXT_VV_RM(vasubu_vv_d, 8)
26998b99a110SFrank Chang 
27008b99a110SFrank Chang RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
27018b99a110SFrank Chang RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
27028b99a110SFrank Chang RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
27038b99a110SFrank Chang RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
270409106eedSeopXD GEN_VEXT_VX_RM(vasubu_vx_b, 1)
270509106eedSeopXD GEN_VEXT_VX_RM(vasubu_vx_h, 2)
270609106eedSeopXD GEN_VEXT_VX_RM(vasubu_vx_w, 4)
270709106eedSeopXD GEN_VEXT_VX_RM(vasubu_vx_d, 8)
27088b99a110SFrank Chang 
27099f0ff9e5SLIU Zhiwei /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
27109f0ff9e5SLIU Zhiwei static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
27119f0ff9e5SLIU Zhiwei {
27129f0ff9e5SLIU Zhiwei     uint8_t round;
27139f0ff9e5SLIU Zhiwei     int16_t res;
27149f0ff9e5SLIU Zhiwei 
27159f0ff9e5SLIU Zhiwei     res = (int16_t)a * (int16_t)b;
27169f0ff9e5SLIU Zhiwei     round = get_round(vxrm, res, 7);
27179f0ff9e5SLIU Zhiwei     res = (res >> 7) + round;
27189f0ff9e5SLIU Zhiwei 
27199f0ff9e5SLIU Zhiwei     if (res > INT8_MAX) {
27209f0ff9e5SLIU Zhiwei         env->vxsat = 0x1;
27219f0ff9e5SLIU Zhiwei         return INT8_MAX;
27229f0ff9e5SLIU Zhiwei     } else if (res < INT8_MIN) {
27239f0ff9e5SLIU Zhiwei         env->vxsat = 0x1;
27249f0ff9e5SLIU Zhiwei         return INT8_MIN;
27259f0ff9e5SLIU Zhiwei     } else {
27269f0ff9e5SLIU Zhiwei         return res;
27279f0ff9e5SLIU Zhiwei     }
27289f0ff9e5SLIU Zhiwei }
27299f0ff9e5SLIU Zhiwei 
27309f0ff9e5SLIU Zhiwei static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
27319f0ff9e5SLIU Zhiwei {
27329f0ff9e5SLIU Zhiwei     uint8_t round;
vsmul16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)27339f0ff9e5SLIU Zhiwei     int32_t res;
27349f0ff9e5SLIU Zhiwei 
27359f0ff9e5SLIU Zhiwei     res = (int32_t)a * (int32_t)b;
27369f0ff9e5SLIU Zhiwei     round = get_round(vxrm, res, 15);
27379f0ff9e5SLIU Zhiwei     res = (res >> 15) + round;
27389f0ff9e5SLIU Zhiwei 
27399f0ff9e5SLIU Zhiwei     if (res > INT16_MAX) {
27409f0ff9e5SLIU Zhiwei         env->vxsat = 0x1;
27419f0ff9e5SLIU Zhiwei         return INT16_MAX;
27429f0ff9e5SLIU Zhiwei     } else if (res < INT16_MIN) {
27439f0ff9e5SLIU Zhiwei         env->vxsat = 0x1;
27449f0ff9e5SLIU Zhiwei         return INT16_MIN;
27459f0ff9e5SLIU Zhiwei     } else {
27469f0ff9e5SLIU Zhiwei         return res;
27479f0ff9e5SLIU Zhiwei     }
27489f0ff9e5SLIU Zhiwei }
27499f0ff9e5SLIU Zhiwei 
27509f0ff9e5SLIU Zhiwei static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
27519f0ff9e5SLIU Zhiwei {
27529f0ff9e5SLIU Zhiwei     uint8_t round;
vsmul32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)27539f0ff9e5SLIU Zhiwei     int64_t res;
27549f0ff9e5SLIU Zhiwei 
27559f0ff9e5SLIU Zhiwei     res = (int64_t)a * (int64_t)b;
27569f0ff9e5SLIU Zhiwei     round = get_round(vxrm, res, 31);
27579f0ff9e5SLIU Zhiwei     res = (res >> 31) + round;
27589f0ff9e5SLIU Zhiwei 
27599f0ff9e5SLIU Zhiwei     if (res > INT32_MAX) {
27609f0ff9e5SLIU Zhiwei         env->vxsat = 0x1;
27619f0ff9e5SLIU Zhiwei         return INT32_MAX;
27629f0ff9e5SLIU Zhiwei     } else if (res < INT32_MIN) {
27639f0ff9e5SLIU Zhiwei         env->vxsat = 0x1;
27649f0ff9e5SLIU Zhiwei         return INT32_MIN;
27659f0ff9e5SLIU Zhiwei     } else {
27669f0ff9e5SLIU Zhiwei         return res;
27679f0ff9e5SLIU Zhiwei     }
27689f0ff9e5SLIU Zhiwei }
27699f0ff9e5SLIU Zhiwei 
27709f0ff9e5SLIU Zhiwei static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
27719f0ff9e5SLIU Zhiwei {
27729f0ff9e5SLIU Zhiwei     uint8_t round;
vsmul64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)27739f0ff9e5SLIU Zhiwei     uint64_t hi_64, lo_64;
27749f0ff9e5SLIU Zhiwei     int64_t res;
27759f0ff9e5SLIU Zhiwei 
27769f0ff9e5SLIU Zhiwei     if (a == INT64_MIN && b == INT64_MIN) {
27779f0ff9e5SLIU Zhiwei         env->vxsat = 1;
27789f0ff9e5SLIU Zhiwei         return INT64_MAX;
27799f0ff9e5SLIU Zhiwei     }
27809f0ff9e5SLIU Zhiwei 
27819f0ff9e5SLIU Zhiwei     muls64(&lo_64, &hi_64, a, b);
27829f0ff9e5SLIU Zhiwei     round = get_round(vxrm, lo_64, 63);
27839f0ff9e5SLIU Zhiwei     /*
27849f0ff9e5SLIU Zhiwei      * Cannot overflow, as there are always
27859f0ff9e5SLIU Zhiwei      * 2 sign bits after multiply.
27869f0ff9e5SLIU Zhiwei      */
27879f0ff9e5SLIU Zhiwei     res = (hi_64 << 1) | (lo_64 >> 63);
27889f0ff9e5SLIU Zhiwei     if (round) {
27899f0ff9e5SLIU Zhiwei         if (res == INT64_MAX) {
27909f0ff9e5SLIU Zhiwei             env->vxsat = 1;
27919f0ff9e5SLIU Zhiwei         } else {
27929f0ff9e5SLIU Zhiwei             res += 1;
27939f0ff9e5SLIU Zhiwei         }
27949f0ff9e5SLIU Zhiwei     }
27959f0ff9e5SLIU Zhiwei     return res;
27969f0ff9e5SLIU Zhiwei }
27979f0ff9e5SLIU Zhiwei 
27989f0ff9e5SLIU Zhiwei RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
27999f0ff9e5SLIU Zhiwei RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
28009f0ff9e5SLIU Zhiwei RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
RVVCALL(OPIVV2_RM,vsmul_vv_b,OP_SSS_B,H1,H1,H1,vsmul8)28019f0ff9e5SLIU Zhiwei RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
280209106eedSeopXD GEN_VEXT_VV_RM(vsmul_vv_b, 1)
280309106eedSeopXD GEN_VEXT_VV_RM(vsmul_vv_h, 2)
280409106eedSeopXD GEN_VEXT_VV_RM(vsmul_vv_w, 4)
280509106eedSeopXD GEN_VEXT_VV_RM(vsmul_vv_d, 8)
28069f0ff9e5SLIU Zhiwei 
28079f0ff9e5SLIU Zhiwei RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
28089f0ff9e5SLIU Zhiwei RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
28099f0ff9e5SLIU Zhiwei RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
28109f0ff9e5SLIU Zhiwei RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
281109106eedSeopXD GEN_VEXT_VX_RM(vsmul_vx_b, 1)
281209106eedSeopXD GEN_VEXT_VX_RM(vsmul_vx_h, 2)
281309106eedSeopXD GEN_VEXT_VX_RM(vsmul_vx_w, 4)
281409106eedSeopXD GEN_VEXT_VX_RM(vsmul_vx_d, 8)
28150a1eaf00SLIU Zhiwei 
281604a61406SLIU Zhiwei /* Vector Single-Width Scaling Shift Instructions */
281704a61406SLIU Zhiwei static inline uint8_t
281804a61406SLIU Zhiwei vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
281904a61406SLIU Zhiwei {
282004a61406SLIU Zhiwei     uint8_t round, shift = b & 0x7;
282104a61406SLIU Zhiwei     uint8_t res;
282204a61406SLIU Zhiwei 
282304a61406SLIU Zhiwei     round = get_round(vxrm, a, shift);
282404a61406SLIU Zhiwei     res = (a >> shift) + round;
282504a61406SLIU Zhiwei     return res;
282604a61406SLIU Zhiwei }
282704a61406SLIU Zhiwei static inline uint16_t
282804a61406SLIU Zhiwei vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
282904a61406SLIU Zhiwei {
283004a61406SLIU Zhiwei     uint8_t round, shift = b & 0xf;
vssrl16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)283104a61406SLIU Zhiwei 
283204a61406SLIU Zhiwei     round = get_round(vxrm, a, shift);
283366997c42SMarkus Armbruster     return (a >> shift) + round;
283404a61406SLIU Zhiwei }
283504a61406SLIU Zhiwei static inline uint32_t
283604a61406SLIU Zhiwei vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
283704a61406SLIU Zhiwei {
283804a61406SLIU Zhiwei     uint8_t round, shift = b & 0x1f;
vssrl32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)283904a61406SLIU Zhiwei 
284004a61406SLIU Zhiwei     round = get_round(vxrm, a, shift);
284166997c42SMarkus Armbruster     return (a >> shift) + round;
284204a61406SLIU Zhiwei }
284304a61406SLIU Zhiwei static inline uint64_t
284404a61406SLIU Zhiwei vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
284504a61406SLIU Zhiwei {
284604a61406SLIU Zhiwei     uint8_t round, shift = b & 0x3f;
vssrl64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)284704a61406SLIU Zhiwei 
284804a61406SLIU Zhiwei     round = get_round(vxrm, a, shift);
284966997c42SMarkus Armbruster     return (a >> shift) + round;
285004a61406SLIU Zhiwei }
285104a61406SLIU Zhiwei RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
285204a61406SLIU Zhiwei RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
285304a61406SLIU Zhiwei RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
RVVCALL(OPIVV2_RM,vssrl_vv_b,OP_UUU_B,H1,H1,H1,vssrl8)285404a61406SLIU Zhiwei RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
285509106eedSeopXD GEN_VEXT_VV_RM(vssrl_vv_b, 1)
285609106eedSeopXD GEN_VEXT_VV_RM(vssrl_vv_h, 2)
285709106eedSeopXD GEN_VEXT_VV_RM(vssrl_vv_w, 4)
285809106eedSeopXD GEN_VEXT_VV_RM(vssrl_vv_d, 8)
285904a61406SLIU Zhiwei 
286004a61406SLIU Zhiwei RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
286104a61406SLIU Zhiwei RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
286204a61406SLIU Zhiwei RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
286304a61406SLIU Zhiwei RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
286409106eedSeopXD GEN_VEXT_VX_RM(vssrl_vx_b, 1)
286509106eedSeopXD GEN_VEXT_VX_RM(vssrl_vx_h, 2)
286609106eedSeopXD GEN_VEXT_VX_RM(vssrl_vx_w, 4)
286709106eedSeopXD GEN_VEXT_VX_RM(vssrl_vx_d, 8)
286804a61406SLIU Zhiwei 
286904a61406SLIU Zhiwei static inline int8_t
287004a61406SLIU Zhiwei vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
287104a61406SLIU Zhiwei {
287204a61406SLIU Zhiwei     uint8_t round, shift = b & 0x7;
287304a61406SLIU Zhiwei 
287404a61406SLIU Zhiwei     round = get_round(vxrm, a, shift);
287566997c42SMarkus Armbruster     return (a >> shift) + round;
287604a61406SLIU Zhiwei }
287704a61406SLIU Zhiwei static inline int16_t
287804a61406SLIU Zhiwei vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
287904a61406SLIU Zhiwei {
288004a61406SLIU Zhiwei     uint8_t round, shift = b & 0xf;
vssra16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)288104a61406SLIU Zhiwei 
288204a61406SLIU Zhiwei     round = get_round(vxrm, a, shift);
288366997c42SMarkus Armbruster     return (a >> shift) + round;
288404a61406SLIU Zhiwei }
288504a61406SLIU Zhiwei static inline int32_t
288604a61406SLIU Zhiwei vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
288704a61406SLIU Zhiwei {
288804a61406SLIU Zhiwei     uint8_t round, shift = b & 0x1f;
vssra32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)288904a61406SLIU Zhiwei 
289004a61406SLIU Zhiwei     round = get_round(vxrm, a, shift);
289166997c42SMarkus Armbruster     return (a >> shift) + round;
289204a61406SLIU Zhiwei }
289304a61406SLIU Zhiwei static inline int64_t
289404a61406SLIU Zhiwei vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
289504a61406SLIU Zhiwei {
289604a61406SLIU Zhiwei     uint8_t round, shift = b & 0x3f;
vssra64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)289704a61406SLIU Zhiwei 
289804a61406SLIU Zhiwei     round = get_round(vxrm, a, shift);
289966997c42SMarkus Armbruster     return (a >> shift) + round;
290004a61406SLIU Zhiwei }
29019ff3d287SLIU Zhiwei 
290204a61406SLIU Zhiwei RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
290304a61406SLIU Zhiwei RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
290404a61406SLIU Zhiwei RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
RVVCALL(OPIVV2_RM,vssra_vv_b,OP_SSS_B,H1,H1,H1,vssra8)290504a61406SLIU Zhiwei RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
290609106eedSeopXD GEN_VEXT_VV_RM(vssra_vv_b, 1)
290709106eedSeopXD GEN_VEXT_VV_RM(vssra_vv_h, 2)
290809106eedSeopXD GEN_VEXT_VV_RM(vssra_vv_w, 4)
290909106eedSeopXD GEN_VEXT_VV_RM(vssra_vv_d, 8)
291004a61406SLIU Zhiwei 
291104a61406SLIU Zhiwei RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
291204a61406SLIU Zhiwei RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
291304a61406SLIU Zhiwei RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
291404a61406SLIU Zhiwei RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
291509106eedSeopXD GEN_VEXT_VX_RM(vssra_vx_b, 1)
291609106eedSeopXD GEN_VEXT_VX_RM(vssra_vx_h, 2)
291709106eedSeopXD GEN_VEXT_VX_RM(vssra_vx_w, 4)
291809106eedSeopXD GEN_VEXT_VX_RM(vssra_vx_d, 8)
29199ff3d287SLIU Zhiwei 
29209ff3d287SLIU Zhiwei /* Vector Narrowing Fixed-Point Clip Instructions */
29219ff3d287SLIU Zhiwei static inline int8_t
29229ff3d287SLIU Zhiwei vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
29239ff3d287SLIU Zhiwei {
29249ff3d287SLIU Zhiwei     uint8_t round, shift = b & 0xf;
29259ff3d287SLIU Zhiwei     int16_t res;
29269ff3d287SLIU Zhiwei 
29279ff3d287SLIU Zhiwei     round = get_round(vxrm, a, shift);
29289ff3d287SLIU Zhiwei     res = (a >> shift) + round;
29299ff3d287SLIU Zhiwei     if (res > INT8_MAX) {
29309ff3d287SLIU Zhiwei         env->vxsat = 0x1;
29319ff3d287SLIU Zhiwei         return INT8_MAX;
29329ff3d287SLIU Zhiwei     } else if (res < INT8_MIN) {
29339ff3d287SLIU Zhiwei         env->vxsat = 0x1;
29349ff3d287SLIU Zhiwei         return INT8_MIN;
29359ff3d287SLIU Zhiwei     } else {
29369ff3d287SLIU Zhiwei         return res;
29379ff3d287SLIU Zhiwei     }
29389ff3d287SLIU Zhiwei }
29399ff3d287SLIU Zhiwei 
29409ff3d287SLIU Zhiwei static inline int16_t
29419ff3d287SLIU Zhiwei vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
29429ff3d287SLIU Zhiwei {
29439ff3d287SLIU Zhiwei     uint8_t round, shift = b & 0x1f;
vnclip16(CPURISCVState * env,int vxrm,int32_t a,int16_t b)29449ff3d287SLIU Zhiwei     int32_t res;
29459ff3d287SLIU Zhiwei 
29469ff3d287SLIU Zhiwei     round = get_round(vxrm, a, shift);
29479ff3d287SLIU Zhiwei     res = (a >> shift) + round;
29489ff3d287SLIU Zhiwei     if (res > INT16_MAX) {
29499ff3d287SLIU Zhiwei         env->vxsat = 0x1;
29509ff3d287SLIU Zhiwei         return INT16_MAX;
29519ff3d287SLIU Zhiwei     } else if (res < INT16_MIN) {
29529ff3d287SLIU Zhiwei         env->vxsat = 0x1;
29539ff3d287SLIU Zhiwei         return INT16_MIN;
29549ff3d287SLIU Zhiwei     } else {
29559ff3d287SLIU Zhiwei         return res;
29569ff3d287SLIU Zhiwei     }
29579ff3d287SLIU Zhiwei }
29589ff3d287SLIU Zhiwei 
29599ff3d287SLIU Zhiwei static inline int32_t
29609ff3d287SLIU Zhiwei vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
29619ff3d287SLIU Zhiwei {
29629ff3d287SLIU Zhiwei     uint8_t round, shift = b & 0x3f;
vnclip32(CPURISCVState * env,int vxrm,int64_t a,int32_t b)29639ff3d287SLIU Zhiwei     int64_t res;
29649ff3d287SLIU Zhiwei 
29659ff3d287SLIU Zhiwei     round = get_round(vxrm, a, shift);
29669ff3d287SLIU Zhiwei     res = (a >> shift) + round;
29679ff3d287SLIU Zhiwei     if (res > INT32_MAX) {
29689ff3d287SLIU Zhiwei         env->vxsat = 0x1;
29699ff3d287SLIU Zhiwei         return INT32_MAX;
29709ff3d287SLIU Zhiwei     } else if (res < INT32_MIN) {
29719ff3d287SLIU Zhiwei         env->vxsat = 0x1;
29729ff3d287SLIU Zhiwei         return INT32_MIN;
29739ff3d287SLIU Zhiwei     } else {
29749ff3d287SLIU Zhiwei         return res;
29759ff3d287SLIU Zhiwei     }
29769ff3d287SLIU Zhiwei }
29779ff3d287SLIU Zhiwei 
2978a70b3a73SFrank Chang RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2979a70b3a73SFrank Chang RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2980a70b3a73SFrank Chang RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
RVVCALL(OPIVV2_RM,vnclip_wv_b,NOP_SSS_B,H1,H2,H1,vnclip8)298109106eedSeopXD GEN_VEXT_VV_RM(vnclip_wv_b, 1)
298209106eedSeopXD GEN_VEXT_VV_RM(vnclip_wv_h, 2)
298309106eedSeopXD GEN_VEXT_VV_RM(vnclip_wv_w, 4)
29849ff3d287SLIU Zhiwei 
2985a70b3a73SFrank Chang RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2986a70b3a73SFrank Chang RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2987a70b3a73SFrank Chang RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
298809106eedSeopXD GEN_VEXT_VX_RM(vnclip_wx_b, 1)
298909106eedSeopXD GEN_VEXT_VX_RM(vnclip_wx_h, 2)
299009106eedSeopXD GEN_VEXT_VX_RM(vnclip_wx_w, 4)
29919ff3d287SLIU Zhiwei 
29929ff3d287SLIU Zhiwei static inline uint8_t
29939ff3d287SLIU Zhiwei vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
29949ff3d287SLIU Zhiwei {
29959ff3d287SLIU Zhiwei     uint8_t round, shift = b & 0xf;
29969ff3d287SLIU Zhiwei     uint16_t res;
29979ff3d287SLIU Zhiwei 
29989ff3d287SLIU Zhiwei     round = get_round(vxrm, a, shift);
29999ff3d287SLIU Zhiwei     res = (a >> shift) + round;
30009ff3d287SLIU Zhiwei     if (res > UINT8_MAX) {
30019ff3d287SLIU Zhiwei         env->vxsat = 0x1;
30029ff3d287SLIU Zhiwei         return UINT8_MAX;
30039ff3d287SLIU Zhiwei     } else {
30049ff3d287SLIU Zhiwei         return res;
30059ff3d287SLIU Zhiwei     }
30069ff3d287SLIU Zhiwei }
30079ff3d287SLIU Zhiwei 
30089ff3d287SLIU Zhiwei static inline uint16_t
30099ff3d287SLIU Zhiwei vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
30109ff3d287SLIU Zhiwei {
30119ff3d287SLIU Zhiwei     uint8_t round, shift = b & 0x1f;
vnclipu16(CPURISCVState * env,int vxrm,uint32_t a,uint16_t b)30129ff3d287SLIU Zhiwei     uint32_t res;
30139ff3d287SLIU Zhiwei 
30149ff3d287SLIU Zhiwei     round = get_round(vxrm, a, shift);
30159ff3d287SLIU Zhiwei     res = (a >> shift) + round;
30169ff3d287SLIU Zhiwei     if (res > UINT16_MAX) {
30179ff3d287SLIU Zhiwei         env->vxsat = 0x1;
30189ff3d287SLIU Zhiwei         return UINT16_MAX;
30199ff3d287SLIU Zhiwei     } else {
30209ff3d287SLIU Zhiwei         return res;
30219ff3d287SLIU Zhiwei     }
30229ff3d287SLIU Zhiwei }
30239ff3d287SLIU Zhiwei 
30249ff3d287SLIU Zhiwei static inline uint32_t
30259ff3d287SLIU Zhiwei vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
30269ff3d287SLIU Zhiwei {
30279ff3d287SLIU Zhiwei     uint8_t round, shift = b & 0x3f;
vnclipu32(CPURISCVState * env,int vxrm,uint64_t a,uint32_t b)3028a70b3a73SFrank Chang     uint64_t res;
30299ff3d287SLIU Zhiwei 
30309ff3d287SLIU Zhiwei     round = get_round(vxrm, a, shift);
30319ff3d287SLIU Zhiwei     res = (a >> shift) + round;
30329ff3d287SLIU Zhiwei     if (res > UINT32_MAX) {
30339ff3d287SLIU Zhiwei         env->vxsat = 0x1;
30349ff3d287SLIU Zhiwei         return UINT32_MAX;
30359ff3d287SLIU Zhiwei     } else {
30369ff3d287SLIU Zhiwei         return res;
30379ff3d287SLIU Zhiwei     }
30389ff3d287SLIU Zhiwei }
30399ff3d287SLIU Zhiwei 
3040a70b3a73SFrank Chang RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3041a70b3a73SFrank Chang RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3042a70b3a73SFrank Chang RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
RVVCALL(OPIVV2_RM,vnclipu_wv_b,NOP_UUU_B,H1,H2,H1,vnclipu8)304309106eedSeopXD GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
304409106eedSeopXD GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
304509106eedSeopXD GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
30469ff3d287SLIU Zhiwei 
3047a70b3a73SFrank Chang RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3048a70b3a73SFrank Chang RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3049a70b3a73SFrank Chang RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
305009106eedSeopXD GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
305109106eedSeopXD GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
305209106eedSeopXD GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
3053ce2a0343SLIU Zhiwei 
3054ce2a0343SLIU Zhiwei /*
30553b57254dSWeiwei Li  * Vector Float Point Arithmetic Instructions
3056ce2a0343SLIU Zhiwei  */
3057ce2a0343SLIU Zhiwei /* Vector Single-Width Floating-Point Add/Subtract Instructions */
3058ce2a0343SLIU Zhiwei #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
3059ce2a0343SLIU Zhiwei static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
3060ce2a0343SLIU Zhiwei                       CPURISCVState *env)                      \
3061ce2a0343SLIU Zhiwei {                                                              \
3062ce2a0343SLIU Zhiwei     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
3063ce2a0343SLIU Zhiwei     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
3064ce2a0343SLIU Zhiwei     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
3065ce2a0343SLIU Zhiwei }
3066ce2a0343SLIU Zhiwei 
30675eacf7d8SeopXD #define GEN_VEXT_VV_ENV(NAME, ESZ)                        \
3068ce2a0343SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
3069ce2a0343SLIU Zhiwei                   void *vs2, CPURISCVState *env,          \
3070ce2a0343SLIU Zhiwei                   uint32_t desc)                          \
3071ce2a0343SLIU Zhiwei {                                                         \
3072ce2a0343SLIU Zhiwei     uint32_t vm = vext_vm(desc);                          \
3073ce2a0343SLIU Zhiwei     uint32_t vl = env->vl;                                \
30745eacf7d8SeopXD     uint32_t total_elems =                                \
30755eacf7d8SeopXD         vext_get_total_elems(env, desc, ESZ);             \
30765eacf7d8SeopXD     uint32_t vta = vext_vta(desc);                        \
30775b448f44SYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                        \
3078ce2a0343SLIU Zhiwei     uint32_t i;                                           \
3079ce2a0343SLIU Zhiwei                                                           \
30807f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                     \
3081df4252b2SDaniel Henrique Barboza                                                           \
3082f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                  \
3083f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {              \
30845b448f44SYueh-Ting (eop) Chen             /* set masked-off elements to 1s */           \
30855b448f44SYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * ESZ,           \
30865b448f44SYueh-Ting (eop) Chen                               (i + 1) * ESZ);             \
3087ce2a0343SLIU Zhiwei             continue;                                     \
3088ce2a0343SLIU Zhiwei         }                                                 \
3089ce2a0343SLIU Zhiwei         do_##NAME(vd, vs1, vs2, i, env);                  \
3090ce2a0343SLIU Zhiwei     }                                                     \
3091f714361eSFrank Chang     env->vstart = 0;                                      \
30925eacf7d8SeopXD     /* set tail elements to 1s */                         \
30935eacf7d8SeopXD     vext_set_elems_1s(vd, vta, vl * ESZ,                  \
30945eacf7d8SeopXD                       total_elems * ESZ);                 \
3095ce2a0343SLIU Zhiwei }
3096ce2a0343SLIU Zhiwei 
3097ce2a0343SLIU Zhiwei RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3098ce2a0343SLIU Zhiwei RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3099ce2a0343SLIU Zhiwei RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
31005eacf7d8SeopXD GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
31015eacf7d8SeopXD GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
31025eacf7d8SeopXD GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
3103ce2a0343SLIU Zhiwei 
3104ce2a0343SLIU Zhiwei #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
3105ce2a0343SLIU Zhiwei static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3106ce2a0343SLIU Zhiwei                       CPURISCVState *env)                      \
3107ce2a0343SLIU Zhiwei {                                                              \
3108ce2a0343SLIU Zhiwei     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
3109ce2a0343SLIU Zhiwei     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3110ce2a0343SLIU Zhiwei }
3111ce2a0343SLIU Zhiwei 
31125eacf7d8SeopXD #define GEN_VEXT_VF(NAME, ESZ)                            \
3113ce2a0343SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
3114ce2a0343SLIU Zhiwei                   void *vs2, CPURISCVState *env,          \
3115ce2a0343SLIU Zhiwei                   uint32_t desc)                          \
3116ce2a0343SLIU Zhiwei {                                                         \
3117ce2a0343SLIU Zhiwei     uint32_t vm = vext_vm(desc);                          \
3118ce2a0343SLIU Zhiwei     uint32_t vl = env->vl;                                \
31195eacf7d8SeopXD     uint32_t total_elems =                                \
31205eacf7d8SeopXD         vext_get_total_elems(env, desc, ESZ);             \
31215eacf7d8SeopXD     uint32_t vta = vext_vta(desc);                        \
31225b448f44SYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                        \
3123ce2a0343SLIU Zhiwei     uint32_t i;                                           \
3124ce2a0343SLIU Zhiwei                                                           \
31257f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                     \
3126df4252b2SDaniel Henrique Barboza                                                           \
3127f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                  \
3128f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {              \
31295b448f44SYueh-Ting (eop) Chen             /* set masked-off elements to 1s */           \
31305b448f44SYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * ESZ,           \
31315b448f44SYueh-Ting (eop) Chen                               (i + 1) * ESZ);             \
3132ce2a0343SLIU Zhiwei             continue;                                     \
3133ce2a0343SLIU Zhiwei         }                                                 \
3134ce2a0343SLIU Zhiwei         do_##NAME(vd, s1, vs2, i, env);                   \
3135ce2a0343SLIU Zhiwei     }                                                     \
3136f714361eSFrank Chang     env->vstart = 0;                                      \
31375eacf7d8SeopXD     /* set tail elements to 1s */                         \
31385eacf7d8SeopXD     vext_set_elems_1s(vd, vta, vl * ESZ,                  \
31395eacf7d8SeopXD                       total_elems * ESZ);                 \
3140ce2a0343SLIU Zhiwei }
3141ce2a0343SLIU Zhiwei 
3142ce2a0343SLIU Zhiwei RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3143ce2a0343SLIU Zhiwei RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3144ce2a0343SLIU Zhiwei RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
31455eacf7d8SeopXD GEN_VEXT_VF(vfadd_vf_h, 2)
31465eacf7d8SeopXD GEN_VEXT_VF(vfadd_vf_w, 4)
31475eacf7d8SeopXD GEN_VEXT_VF(vfadd_vf_d, 8)
3148ce2a0343SLIU Zhiwei 
3149ce2a0343SLIU Zhiwei RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3150ce2a0343SLIU Zhiwei RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3151ce2a0343SLIU Zhiwei RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
31525eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
31535eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
31545eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
3155ce2a0343SLIU Zhiwei RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3156ce2a0343SLIU Zhiwei RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3157ce2a0343SLIU Zhiwei RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
31585eacf7d8SeopXD GEN_VEXT_VF(vfsub_vf_h, 2)
31595eacf7d8SeopXD GEN_VEXT_VF(vfsub_vf_w, 4)
31605eacf7d8SeopXD GEN_VEXT_VF(vfsub_vf_d, 8)
3161ce2a0343SLIU Zhiwei 
3162ce2a0343SLIU Zhiwei static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3163ce2a0343SLIU Zhiwei {
3164ce2a0343SLIU Zhiwei     return float16_sub(b, a, s);
3165ce2a0343SLIU Zhiwei }
3166ce2a0343SLIU Zhiwei 
3167ce2a0343SLIU Zhiwei static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3168ce2a0343SLIU Zhiwei {
3169ce2a0343SLIU Zhiwei     return float32_sub(b, a, s);
float32_rsub(uint32_t a,uint32_t b,float_status * s)3170ce2a0343SLIU Zhiwei }
3171ce2a0343SLIU Zhiwei 
3172ce2a0343SLIU Zhiwei static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3173ce2a0343SLIU Zhiwei {
3174ce2a0343SLIU Zhiwei     return float64_sub(b, a, s);
3175ce2a0343SLIU Zhiwei }
3176ce2a0343SLIU Zhiwei 
3177ce2a0343SLIU Zhiwei RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3178ce2a0343SLIU Zhiwei RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3179ce2a0343SLIU Zhiwei RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
RVVCALL(OPFVF2,vfrsub_vf_h,OP_UUU_H,H2,H2,float16_rsub)31805eacf7d8SeopXD GEN_VEXT_VF(vfrsub_vf_h, 2)
31815eacf7d8SeopXD GEN_VEXT_VF(vfrsub_vf_w, 4)
31825eacf7d8SeopXD GEN_VEXT_VF(vfrsub_vf_d, 8)
3183eeffab2eSLIU Zhiwei 
3184eeffab2eSLIU Zhiwei /* Vector Widening Floating-Point Add/Subtract Instructions */
3185eeffab2eSLIU Zhiwei static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3186eeffab2eSLIU Zhiwei {
3187eeffab2eSLIU Zhiwei     return float32_add(float16_to_float32(a, true, s),
3188eeffab2eSLIU Zhiwei                        float16_to_float32(b, true, s), s);
3189eeffab2eSLIU Zhiwei }
3190eeffab2eSLIU Zhiwei 
3191eeffab2eSLIU Zhiwei static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3192eeffab2eSLIU Zhiwei {
3193eeffab2eSLIU Zhiwei     return float64_add(float32_to_float64(a, s),
vfwadd32(uint32_t a,uint32_t b,float_status * s)3194eeffab2eSLIU Zhiwei                        float32_to_float64(b, s), s);
3195eeffab2eSLIU Zhiwei 
3196eeffab2eSLIU Zhiwei }
3197eeffab2eSLIU Zhiwei 
3198eeffab2eSLIU Zhiwei RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3199eeffab2eSLIU Zhiwei RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
32005eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
RVVCALL(OPFVV2,vfwadd_vv_h,WOP_UUU_H,H4,H2,H2,vfwadd16)32015eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
3202eeffab2eSLIU Zhiwei RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3203eeffab2eSLIU Zhiwei RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
32045eacf7d8SeopXD GEN_VEXT_VF(vfwadd_vf_h, 4)
32055eacf7d8SeopXD GEN_VEXT_VF(vfwadd_vf_w, 8)
3206eeffab2eSLIU Zhiwei 
3207eeffab2eSLIU Zhiwei static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3208eeffab2eSLIU Zhiwei {
3209eeffab2eSLIU Zhiwei     return float32_sub(float16_to_float32(a, true, s),
3210eeffab2eSLIU Zhiwei                        float16_to_float32(b, true, s), s);
3211eeffab2eSLIU Zhiwei }
3212eeffab2eSLIU Zhiwei 
3213eeffab2eSLIU Zhiwei static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3214eeffab2eSLIU Zhiwei {
3215eeffab2eSLIU Zhiwei     return float64_sub(float32_to_float64(a, s),
vfwsub32(uint32_t a,uint32_t b,float_status * s)3216eeffab2eSLIU Zhiwei                        float32_to_float64(b, s), s);
3217eeffab2eSLIU Zhiwei 
3218eeffab2eSLIU Zhiwei }
3219eeffab2eSLIU Zhiwei 
3220eeffab2eSLIU Zhiwei RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3221eeffab2eSLIU Zhiwei RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
32225eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
RVVCALL(OPFVV2,vfwsub_vv_h,WOP_UUU_H,H4,H2,H2,vfwsub16)32235eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
3224eeffab2eSLIU Zhiwei RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3225eeffab2eSLIU Zhiwei RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
32265eacf7d8SeopXD GEN_VEXT_VF(vfwsub_vf_h, 4)
32275eacf7d8SeopXD GEN_VEXT_VF(vfwsub_vf_w, 8)
3228eeffab2eSLIU Zhiwei 
3229eeffab2eSLIU Zhiwei static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3230eeffab2eSLIU Zhiwei {
3231eeffab2eSLIU Zhiwei     return float32_add(a, float16_to_float32(b, true, s), s);
3232eeffab2eSLIU Zhiwei }
3233eeffab2eSLIU Zhiwei 
3234eeffab2eSLIU Zhiwei static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3235eeffab2eSLIU Zhiwei {
3236eeffab2eSLIU Zhiwei     return float64_add(a, float32_to_float64(b, s), s);
vfwaddw32(uint64_t a,uint32_t b,float_status * s)3237eeffab2eSLIU Zhiwei }
3238eeffab2eSLIU Zhiwei 
3239eeffab2eSLIU Zhiwei RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3240eeffab2eSLIU Zhiwei RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
32415eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
32425eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
3243eeffab2eSLIU Zhiwei RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3244eeffab2eSLIU Zhiwei RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
32455eacf7d8SeopXD GEN_VEXT_VF(vfwadd_wf_h, 4)
32465eacf7d8SeopXD GEN_VEXT_VF(vfwadd_wf_w, 8)
3247eeffab2eSLIU Zhiwei 
3248eeffab2eSLIU Zhiwei static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3249eeffab2eSLIU Zhiwei {
3250eeffab2eSLIU Zhiwei     return float32_sub(a, float16_to_float32(b, true, s), s);
3251eeffab2eSLIU Zhiwei }
3252eeffab2eSLIU Zhiwei 
3253eeffab2eSLIU Zhiwei static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3254eeffab2eSLIU Zhiwei {
3255eeffab2eSLIU Zhiwei     return float64_sub(a, float32_to_float64(b, s), s);
vfwsubw32(uint64_t a,uint32_t b,float_status * s)3256eeffab2eSLIU Zhiwei }
3257eeffab2eSLIU Zhiwei 
3258eeffab2eSLIU Zhiwei RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3259eeffab2eSLIU Zhiwei RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
32605eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
32615eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
3262eeffab2eSLIU Zhiwei RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3263eeffab2eSLIU Zhiwei RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
32645eacf7d8SeopXD GEN_VEXT_VF(vfwsub_wf_h, 4)
32655eacf7d8SeopXD GEN_VEXT_VF(vfwsub_wf_w, 8)
32660e0057cbSLIU Zhiwei 
32670e0057cbSLIU Zhiwei /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
32680e0057cbSLIU Zhiwei RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
32690e0057cbSLIU Zhiwei RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
32700e0057cbSLIU Zhiwei RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
32715eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
32725eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
32735eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
32740e0057cbSLIU Zhiwei RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
32750e0057cbSLIU Zhiwei RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
32760e0057cbSLIU Zhiwei RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
32775eacf7d8SeopXD GEN_VEXT_VF(vfmul_vf_h, 2)
32785eacf7d8SeopXD GEN_VEXT_VF(vfmul_vf_w, 4)
32795eacf7d8SeopXD GEN_VEXT_VF(vfmul_vf_d, 8)
32800e0057cbSLIU Zhiwei 
32810e0057cbSLIU Zhiwei RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
32820e0057cbSLIU Zhiwei RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
32830e0057cbSLIU Zhiwei RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
32845eacf7d8SeopXD GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
32855eacf7d8SeopXD GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
32865eacf7d8SeopXD GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
32870e0057cbSLIU Zhiwei RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
32880e0057cbSLIU Zhiwei RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
32890e0057cbSLIU Zhiwei RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
32905eacf7d8SeopXD GEN_VEXT_VF(vfdiv_vf_h, 2)
32915eacf7d8SeopXD GEN_VEXT_VF(vfdiv_vf_w, 4)
32925eacf7d8SeopXD GEN_VEXT_VF(vfdiv_vf_d, 8)
32930e0057cbSLIU Zhiwei 
32940e0057cbSLIU Zhiwei static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
32950e0057cbSLIU Zhiwei {
32960e0057cbSLIU Zhiwei     return float16_div(b, a, s);
32970e0057cbSLIU Zhiwei }
32980e0057cbSLIU Zhiwei 
32990e0057cbSLIU Zhiwei static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
33000e0057cbSLIU Zhiwei {
33010e0057cbSLIU Zhiwei     return float32_div(b, a, s);
float32_rdiv(uint32_t a,uint32_t b,float_status * s)33020e0057cbSLIU Zhiwei }
33030e0057cbSLIU Zhiwei 
33040e0057cbSLIU Zhiwei static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
33050e0057cbSLIU Zhiwei {
33060e0057cbSLIU Zhiwei     return float64_div(b, a, s);
33070e0057cbSLIU Zhiwei }
33080e0057cbSLIU Zhiwei 
33090e0057cbSLIU Zhiwei RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
33100e0057cbSLIU Zhiwei RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
33110e0057cbSLIU Zhiwei RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
RVVCALL(OPFVF2,vfrdiv_vf_h,OP_UUU_H,H2,H2,float16_rdiv)33125eacf7d8SeopXD GEN_VEXT_VF(vfrdiv_vf_h, 2)
33135eacf7d8SeopXD GEN_VEXT_VF(vfrdiv_vf_w, 4)
33145eacf7d8SeopXD GEN_VEXT_VF(vfrdiv_vf_d, 8)
3315f7c7b7cdSLIU Zhiwei 
3316f7c7b7cdSLIU Zhiwei /* Vector Widening Floating-Point Multiply */
3317f7c7b7cdSLIU Zhiwei static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3318f7c7b7cdSLIU Zhiwei {
3319f7c7b7cdSLIU Zhiwei     return float32_mul(float16_to_float32(a, true, s),
3320f7c7b7cdSLIU Zhiwei                        float16_to_float32(b, true, s), s);
3321f7c7b7cdSLIU Zhiwei }
3322f7c7b7cdSLIU Zhiwei 
3323f7c7b7cdSLIU Zhiwei static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3324f7c7b7cdSLIU Zhiwei {
3325f7c7b7cdSLIU Zhiwei     return float64_mul(float32_to_float64(a, s),
vfwmul32(uint32_t a,uint32_t b,float_status * s)3326f7c7b7cdSLIU Zhiwei                        float32_to_float64(b, s), s);
3327f7c7b7cdSLIU Zhiwei 
3328f7c7b7cdSLIU Zhiwei }
3329f7c7b7cdSLIU Zhiwei RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3330f7c7b7cdSLIU Zhiwei RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
33315eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
RVVCALL(OPFVV2,vfwmul_vv_h,WOP_UUU_H,H4,H2,H2,vfwmul16)33325eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
3333f7c7b7cdSLIU Zhiwei RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3334f7c7b7cdSLIU Zhiwei RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
33355eacf7d8SeopXD GEN_VEXT_VF(vfwmul_vf_h, 4)
33365eacf7d8SeopXD GEN_VEXT_VF(vfwmul_vf_w, 8)
33374aa5a8feSLIU Zhiwei 
33384aa5a8feSLIU Zhiwei /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
33394aa5a8feSLIU Zhiwei #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
33404aa5a8feSLIU Zhiwei static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
33414aa5a8feSLIU Zhiwei                       CPURISCVState *env)                          \
33424aa5a8feSLIU Zhiwei {                                                                  \
33434aa5a8feSLIU Zhiwei     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
33444aa5a8feSLIU Zhiwei     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
33454aa5a8feSLIU Zhiwei     TD d = *((TD *)vd + HD(i));                                    \
33464aa5a8feSLIU Zhiwei     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
33474aa5a8feSLIU Zhiwei }
33484aa5a8feSLIU Zhiwei 
33494aa5a8feSLIU Zhiwei static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
33504aa5a8feSLIU Zhiwei {
33514aa5a8feSLIU Zhiwei     return float16_muladd(a, b, d, 0, s);
33524aa5a8feSLIU Zhiwei }
33534aa5a8feSLIU Zhiwei 
33544aa5a8feSLIU Zhiwei static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
33554aa5a8feSLIU Zhiwei {
33564aa5a8feSLIU Zhiwei     return float32_muladd(a, b, d, 0, s);
fmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)33574aa5a8feSLIU Zhiwei }
33584aa5a8feSLIU Zhiwei 
33594aa5a8feSLIU Zhiwei static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
33604aa5a8feSLIU Zhiwei {
33614aa5a8feSLIU Zhiwei     return float64_muladd(a, b, d, 0, s);
33624aa5a8feSLIU Zhiwei }
33634aa5a8feSLIU Zhiwei 
33644aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
33654aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
33664aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
RVVCALL(OPFVV3,vfmacc_vv_h,OP_UUU_H,H2,H2,H2,fmacc16)33675eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
33685eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
33695eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
33704aa5a8feSLIU Zhiwei 
33714aa5a8feSLIU Zhiwei #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
33724aa5a8feSLIU Zhiwei static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
33734aa5a8feSLIU Zhiwei                       CPURISCVState *env)                         \
33744aa5a8feSLIU Zhiwei {                                                                 \
33754aa5a8feSLIU Zhiwei     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
33764aa5a8feSLIU Zhiwei     TD d = *((TD *)vd + HD(i));                                   \
33774aa5a8feSLIU Zhiwei     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
33784aa5a8feSLIU Zhiwei }
33794aa5a8feSLIU Zhiwei 
33804aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
33814aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
33824aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
33835eacf7d8SeopXD GEN_VEXT_VF(vfmacc_vf_h, 2)
33845eacf7d8SeopXD GEN_VEXT_VF(vfmacc_vf_w, 4)
33855eacf7d8SeopXD GEN_VEXT_VF(vfmacc_vf_d, 8)
33864aa5a8feSLIU Zhiwei 
33874aa5a8feSLIU Zhiwei static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
33884aa5a8feSLIU Zhiwei {
3389c45eff30SWeiwei Li     return float16_muladd(a, b, d, float_muladd_negate_c |
3390c45eff30SWeiwei Li                                    float_muladd_negate_product, s);
33914aa5a8feSLIU Zhiwei }
33924aa5a8feSLIU Zhiwei 
33934aa5a8feSLIU Zhiwei static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
33944aa5a8feSLIU Zhiwei {
3395c45eff30SWeiwei Li     return float32_muladd(a, b, d, float_muladd_negate_c |
fnmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3396c45eff30SWeiwei Li                                    float_muladd_negate_product, s);
33974aa5a8feSLIU Zhiwei }
33984aa5a8feSLIU Zhiwei 
33994aa5a8feSLIU Zhiwei static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
34004aa5a8feSLIU Zhiwei {
3401c45eff30SWeiwei Li     return float64_muladd(a, b, d, float_muladd_negate_c |
fnmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3402c45eff30SWeiwei Li                                    float_muladd_negate_product, s);
34034aa5a8feSLIU Zhiwei }
34044aa5a8feSLIU Zhiwei 
34054aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
34064aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
34074aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
RVVCALL(OPFVV3,vfnmacc_vv_h,OP_UUU_H,H2,H2,H2,fnmacc16)34085eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
34095eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
34105eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
34114aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
34124aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
34134aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
34145eacf7d8SeopXD GEN_VEXT_VF(vfnmacc_vf_h, 2)
34155eacf7d8SeopXD GEN_VEXT_VF(vfnmacc_vf_w, 4)
34165eacf7d8SeopXD GEN_VEXT_VF(vfnmacc_vf_d, 8)
34174aa5a8feSLIU Zhiwei 
34184aa5a8feSLIU Zhiwei static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
34194aa5a8feSLIU Zhiwei {
34204aa5a8feSLIU Zhiwei     return float16_muladd(a, b, d, float_muladd_negate_c, s);
34214aa5a8feSLIU Zhiwei }
34224aa5a8feSLIU Zhiwei 
34234aa5a8feSLIU Zhiwei static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
34244aa5a8feSLIU Zhiwei {
34254aa5a8feSLIU Zhiwei     return float32_muladd(a, b, d, float_muladd_negate_c, s);
fmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)34264aa5a8feSLIU Zhiwei }
34274aa5a8feSLIU Zhiwei 
34284aa5a8feSLIU Zhiwei static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
34294aa5a8feSLIU Zhiwei {
34304aa5a8feSLIU Zhiwei     return float64_muladd(a, b, d, float_muladd_negate_c, s);
34314aa5a8feSLIU Zhiwei }
34324aa5a8feSLIU Zhiwei 
34334aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
34344aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
34354aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
RVVCALL(OPFVV3,vfmsac_vv_h,OP_UUU_H,H2,H2,H2,fmsac16)34365eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
34375eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
34385eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
34394aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
34404aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
34414aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
34425eacf7d8SeopXD GEN_VEXT_VF(vfmsac_vf_h, 2)
34435eacf7d8SeopXD GEN_VEXT_VF(vfmsac_vf_w, 4)
34445eacf7d8SeopXD GEN_VEXT_VF(vfmsac_vf_d, 8)
34454aa5a8feSLIU Zhiwei 
34464aa5a8feSLIU Zhiwei static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
34474aa5a8feSLIU Zhiwei {
34484aa5a8feSLIU Zhiwei     return float16_muladd(a, b, d, float_muladd_negate_product, s);
34494aa5a8feSLIU Zhiwei }
34504aa5a8feSLIU Zhiwei 
34514aa5a8feSLIU Zhiwei static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
34524aa5a8feSLIU Zhiwei {
34534aa5a8feSLIU Zhiwei     return float32_muladd(a, b, d, float_muladd_negate_product, s);
fnmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)34544aa5a8feSLIU Zhiwei }
34554aa5a8feSLIU Zhiwei 
34564aa5a8feSLIU Zhiwei static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
34574aa5a8feSLIU Zhiwei {
34584aa5a8feSLIU Zhiwei     return float64_muladd(a, b, d, float_muladd_negate_product, s);
34594aa5a8feSLIU Zhiwei }
34604aa5a8feSLIU Zhiwei 
34614aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
34624aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
34634aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
RVVCALL(OPFVV3,vfnmsac_vv_h,OP_UUU_H,H2,H2,H2,fnmsac16)34645eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
34655eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
34665eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
34674aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
34684aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
34694aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
34705eacf7d8SeopXD GEN_VEXT_VF(vfnmsac_vf_h, 2)
34715eacf7d8SeopXD GEN_VEXT_VF(vfnmsac_vf_w, 4)
34725eacf7d8SeopXD GEN_VEXT_VF(vfnmsac_vf_d, 8)
34734aa5a8feSLIU Zhiwei 
34744aa5a8feSLIU Zhiwei static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
34754aa5a8feSLIU Zhiwei {
34764aa5a8feSLIU Zhiwei     return float16_muladd(d, b, a, 0, s);
34774aa5a8feSLIU Zhiwei }
34784aa5a8feSLIU Zhiwei 
34794aa5a8feSLIU Zhiwei static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
34804aa5a8feSLIU Zhiwei {
34814aa5a8feSLIU Zhiwei     return float32_muladd(d, b, a, 0, s);
fmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)34824aa5a8feSLIU Zhiwei }
34834aa5a8feSLIU Zhiwei 
34844aa5a8feSLIU Zhiwei static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
34854aa5a8feSLIU Zhiwei {
34864aa5a8feSLIU Zhiwei     return float64_muladd(d, b, a, 0, s);
34874aa5a8feSLIU Zhiwei }
34884aa5a8feSLIU Zhiwei 
34894aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
34904aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
34914aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
RVVCALL(OPFVV3,vfmadd_vv_h,OP_UUU_H,H2,H2,H2,fmadd16)34925eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
34935eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
34945eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
34954aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
34964aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
34974aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
34985eacf7d8SeopXD GEN_VEXT_VF(vfmadd_vf_h, 2)
34995eacf7d8SeopXD GEN_VEXT_VF(vfmadd_vf_w, 4)
35005eacf7d8SeopXD GEN_VEXT_VF(vfmadd_vf_d, 8)
35014aa5a8feSLIU Zhiwei 
35024aa5a8feSLIU Zhiwei static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
35034aa5a8feSLIU Zhiwei {
3504c45eff30SWeiwei Li     return float16_muladd(d, b, a, float_muladd_negate_c |
3505c45eff30SWeiwei Li                                    float_muladd_negate_product, s);
35064aa5a8feSLIU Zhiwei }
35074aa5a8feSLIU Zhiwei 
35084aa5a8feSLIU Zhiwei static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
35094aa5a8feSLIU Zhiwei {
3510c45eff30SWeiwei Li     return float32_muladd(d, b, a, float_muladd_negate_c |
fnmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3511c45eff30SWeiwei Li                                    float_muladd_negate_product, s);
35124aa5a8feSLIU Zhiwei }
35134aa5a8feSLIU Zhiwei 
35144aa5a8feSLIU Zhiwei static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
35154aa5a8feSLIU Zhiwei {
3516c45eff30SWeiwei Li     return float64_muladd(d, b, a, float_muladd_negate_c |
fnmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3517c45eff30SWeiwei Li                                    float_muladd_negate_product, s);
35184aa5a8feSLIU Zhiwei }
35194aa5a8feSLIU Zhiwei 
35204aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
35214aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
35224aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
RVVCALL(OPFVV3,vfnmadd_vv_h,OP_UUU_H,H2,H2,H2,fnmadd16)35235eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
35245eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
35255eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
35264aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
35274aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
35284aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
35295eacf7d8SeopXD GEN_VEXT_VF(vfnmadd_vf_h, 2)
35305eacf7d8SeopXD GEN_VEXT_VF(vfnmadd_vf_w, 4)
35315eacf7d8SeopXD GEN_VEXT_VF(vfnmadd_vf_d, 8)
35324aa5a8feSLIU Zhiwei 
35334aa5a8feSLIU Zhiwei static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
35344aa5a8feSLIU Zhiwei {
35354aa5a8feSLIU Zhiwei     return float16_muladd(d, b, a, float_muladd_negate_c, s);
35364aa5a8feSLIU Zhiwei }
35374aa5a8feSLIU Zhiwei 
35384aa5a8feSLIU Zhiwei static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
35394aa5a8feSLIU Zhiwei {
35404aa5a8feSLIU Zhiwei     return float32_muladd(d, b, a, float_muladd_negate_c, s);
fmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)35414aa5a8feSLIU Zhiwei }
35424aa5a8feSLIU Zhiwei 
35434aa5a8feSLIU Zhiwei static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
35444aa5a8feSLIU Zhiwei {
35454aa5a8feSLIU Zhiwei     return float64_muladd(d, b, a, float_muladd_negate_c, s);
35464aa5a8feSLIU Zhiwei }
35474aa5a8feSLIU Zhiwei 
35484aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
35494aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
35504aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
RVVCALL(OPFVV3,vfmsub_vv_h,OP_UUU_H,H2,H2,H2,fmsub16)35515eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
35525eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
35535eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
35544aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
35554aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
35564aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
35575eacf7d8SeopXD GEN_VEXT_VF(vfmsub_vf_h, 2)
35585eacf7d8SeopXD GEN_VEXT_VF(vfmsub_vf_w, 4)
35595eacf7d8SeopXD GEN_VEXT_VF(vfmsub_vf_d, 8)
35604aa5a8feSLIU Zhiwei 
35614aa5a8feSLIU Zhiwei static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
35624aa5a8feSLIU Zhiwei {
35634aa5a8feSLIU Zhiwei     return float16_muladd(d, b, a, float_muladd_negate_product, s);
35644aa5a8feSLIU Zhiwei }
35654aa5a8feSLIU Zhiwei 
35664aa5a8feSLIU Zhiwei static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
35674aa5a8feSLIU Zhiwei {
35684aa5a8feSLIU Zhiwei     return float32_muladd(d, b, a, float_muladd_negate_product, s);
fnmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)35694aa5a8feSLIU Zhiwei }
35704aa5a8feSLIU Zhiwei 
35714aa5a8feSLIU Zhiwei static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
35724aa5a8feSLIU Zhiwei {
35734aa5a8feSLIU Zhiwei     return float64_muladd(d, b, a, float_muladd_negate_product, s);
35744aa5a8feSLIU Zhiwei }
35754aa5a8feSLIU Zhiwei 
35764aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
35774aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
35784aa5a8feSLIU Zhiwei RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
RVVCALL(OPFVV3,vfnmsub_vv_h,OP_UUU_H,H2,H2,H2,fnmsub16)35795eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
35805eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
35815eacf7d8SeopXD GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
35824aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
35834aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
35844aa5a8feSLIU Zhiwei RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
35855eacf7d8SeopXD GEN_VEXT_VF(vfnmsub_vf_h, 2)
35865eacf7d8SeopXD GEN_VEXT_VF(vfnmsub_vf_w, 4)
35875eacf7d8SeopXD GEN_VEXT_VF(vfnmsub_vf_d, 8)
35880dd50959SLIU Zhiwei 
35890dd50959SLIU Zhiwei /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
35900dd50959SLIU Zhiwei static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
35910dd50959SLIU Zhiwei {
35920dd50959SLIU Zhiwei     return float32_muladd(float16_to_float32(a, true, s),
35930dd50959SLIU Zhiwei                           float16_to_float32(b, true, s), d, 0, s);
35940dd50959SLIU Zhiwei }
35950dd50959SLIU Zhiwei 
35960dd50959SLIU Zhiwei static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
35970dd50959SLIU Zhiwei {
35980dd50959SLIU Zhiwei     return float64_muladd(float32_to_float64(a, s),
fwmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)35990dd50959SLIU Zhiwei                           float32_to_float64(b, s), d, 0, s);
36000dd50959SLIU Zhiwei }
36010dd50959SLIU Zhiwei 
36020dd50959SLIU Zhiwei RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
36030dd50959SLIU Zhiwei RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
36045eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
RVVCALL(OPFVV3,vfwmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwmacc16)36055eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
36060dd50959SLIU Zhiwei RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
36070dd50959SLIU Zhiwei RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
36085eacf7d8SeopXD GEN_VEXT_VF(vfwmacc_vf_h, 4)
36095eacf7d8SeopXD GEN_VEXT_VF(vfwmacc_vf_w, 8)
36100dd50959SLIU Zhiwei 
3611adf772b0SWeiwei Li static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3612adf772b0SWeiwei Li {
3613adf772b0SWeiwei Li     return float32_muladd(bfloat16_to_float32(a, s),
3614adf772b0SWeiwei Li                           bfloat16_to_float32(b, s), d, 0, s);
3615adf772b0SWeiwei Li }
3616adf772b0SWeiwei Li 
3617adf772b0SWeiwei Li RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3618adf772b0SWeiwei Li GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3619837570ceSMax Chou RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
RVVCALL(OPFVV3,vfwmaccbf16_vv,WOP_UUU_H,H4,H2,H2,fwmaccbf16)3620adf772b0SWeiwei Li GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3621adf772b0SWeiwei Li 
36220dd50959SLIU Zhiwei static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
36230dd50959SLIU Zhiwei {
36240dd50959SLIU Zhiwei     return float32_muladd(float16_to_float32(a, true, s),
36250dd50959SLIU Zhiwei                           float16_to_float32(b, true, s), d,
3626c45eff30SWeiwei Li                           float_muladd_negate_c | float_muladd_negate_product,
3627c45eff30SWeiwei Li                           s);
36280dd50959SLIU Zhiwei }
36290dd50959SLIU Zhiwei 
36300dd50959SLIU Zhiwei static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
36310dd50959SLIU Zhiwei {
3632c45eff30SWeiwei Li     return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
fwnmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3633c45eff30SWeiwei Li                           d, float_muladd_negate_c |
3634c45eff30SWeiwei Li                              float_muladd_negate_product, s);
36350dd50959SLIU Zhiwei }
36360dd50959SLIU Zhiwei 
36370dd50959SLIU Zhiwei RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
36380dd50959SLIU Zhiwei RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
36395eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
RVVCALL(OPFVV3,vfwnmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwnmacc16)36405eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
36410dd50959SLIU Zhiwei RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
36420dd50959SLIU Zhiwei RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
36435eacf7d8SeopXD GEN_VEXT_VF(vfwnmacc_vf_h, 4)
36445eacf7d8SeopXD GEN_VEXT_VF(vfwnmacc_vf_w, 8)
36450dd50959SLIU Zhiwei 
36460dd50959SLIU Zhiwei static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
36470dd50959SLIU Zhiwei {
36480dd50959SLIU Zhiwei     return float32_muladd(float16_to_float32(a, true, s),
36490dd50959SLIU Zhiwei                           float16_to_float32(b, true, s), d,
36500dd50959SLIU Zhiwei                           float_muladd_negate_c, s);
36510dd50959SLIU Zhiwei }
36520dd50959SLIU Zhiwei 
36530dd50959SLIU Zhiwei static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
36540dd50959SLIU Zhiwei {
36550dd50959SLIU Zhiwei     return float64_muladd(float32_to_float64(a, s),
fwmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)36560dd50959SLIU Zhiwei                           float32_to_float64(b, s), d,
36570dd50959SLIU Zhiwei                           float_muladd_negate_c, s);
36580dd50959SLIU Zhiwei }
36590dd50959SLIU Zhiwei 
36600dd50959SLIU Zhiwei RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
36610dd50959SLIU Zhiwei RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
36625eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
RVVCALL(OPFVV3,vfwmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwmsac16)36635eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
36640dd50959SLIU Zhiwei RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
36650dd50959SLIU Zhiwei RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
36665eacf7d8SeopXD GEN_VEXT_VF(vfwmsac_vf_h, 4)
36675eacf7d8SeopXD GEN_VEXT_VF(vfwmsac_vf_w, 8)
36680dd50959SLIU Zhiwei 
36690dd50959SLIU Zhiwei static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
36700dd50959SLIU Zhiwei {
36710dd50959SLIU Zhiwei     return float32_muladd(float16_to_float32(a, true, s),
36720dd50959SLIU Zhiwei                           float16_to_float32(b, true, s), d,
36730dd50959SLIU Zhiwei                           float_muladd_negate_product, s);
36740dd50959SLIU Zhiwei }
36750dd50959SLIU Zhiwei 
36760dd50959SLIU Zhiwei static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
36770dd50959SLIU Zhiwei {
36780dd50959SLIU Zhiwei     return float64_muladd(float32_to_float64(a, s),
fwnmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)36790dd50959SLIU Zhiwei                           float32_to_float64(b, s), d,
36800dd50959SLIU Zhiwei                           float_muladd_negate_product, s);
36810dd50959SLIU Zhiwei }
36820dd50959SLIU Zhiwei 
36830dd50959SLIU Zhiwei RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
36840dd50959SLIU Zhiwei RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
36855eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
RVVCALL(OPFVV3,vfwnmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwnmsac16)36865eacf7d8SeopXD GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
36870dd50959SLIU Zhiwei RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
36880dd50959SLIU Zhiwei RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
36895eacf7d8SeopXD GEN_VEXT_VF(vfwnmsac_vf_h, 4)
36905eacf7d8SeopXD GEN_VEXT_VF(vfwnmsac_vf_w, 8)
3691d9e4ce72SLIU Zhiwei 
3692d9e4ce72SLIU Zhiwei /* Vector Floating-Point Square-Root Instruction */
3693d9e4ce72SLIU Zhiwei #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
3694d9e4ce72SLIU Zhiwei static void do_##NAME(void *vd, void *vs2, int i,      \
3695d9e4ce72SLIU Zhiwei                       CPURISCVState *env)              \
3696d9e4ce72SLIU Zhiwei {                                                      \
3697d9e4ce72SLIU Zhiwei     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3698d9e4ce72SLIU Zhiwei     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3699d9e4ce72SLIU Zhiwei }
3700d9e4ce72SLIU Zhiwei 
37015eacf7d8SeopXD #define GEN_VEXT_V_ENV(NAME, ESZ)                      \
3702d9e4ce72SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3703d9e4ce72SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)   \
3704d9e4ce72SLIU Zhiwei {                                                      \
3705d9e4ce72SLIU Zhiwei     uint32_t vm = vext_vm(desc);                       \
3706d9e4ce72SLIU Zhiwei     uint32_t vl = env->vl;                             \
37075eacf7d8SeopXD     uint32_t total_elems =                             \
37085eacf7d8SeopXD         vext_get_total_elems(env, desc, ESZ);          \
37095eacf7d8SeopXD     uint32_t vta = vext_vta(desc);                     \
37105b448f44SYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                     \
3711d9e4ce72SLIU Zhiwei     uint32_t i;                                        \
3712d9e4ce72SLIU Zhiwei                                                        \
37137f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                  \
3714df4252b2SDaniel Henrique Barboza                                                        \
3715d9e4ce72SLIU Zhiwei     if (vl == 0) {                                     \
3716d9e4ce72SLIU Zhiwei         return;                                        \
3717d9e4ce72SLIU Zhiwei     }                                                  \
3718f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {               \
3719f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {           \
37205b448f44SYueh-Ting (eop) Chen             /* set masked-off elements to 1s */        \
37215b448f44SYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * ESZ,        \
37225b448f44SYueh-Ting (eop) Chen                               (i + 1) * ESZ);          \
3723d9e4ce72SLIU Zhiwei             continue;                                  \
3724d9e4ce72SLIU Zhiwei         }                                              \
3725d9e4ce72SLIU Zhiwei         do_##NAME(vd, vs2, i, env);                    \
3726d9e4ce72SLIU Zhiwei     }                                                  \
3727f714361eSFrank Chang     env->vstart = 0;                                   \
37285eacf7d8SeopXD     vext_set_elems_1s(vd, vta, vl * ESZ,               \
37295eacf7d8SeopXD                       total_elems * ESZ);              \
3730d9e4ce72SLIU Zhiwei }
3731d9e4ce72SLIU Zhiwei 
3732d9e4ce72SLIU Zhiwei RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3733d9e4ce72SLIU Zhiwei RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3734d9e4ce72SLIU Zhiwei RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
37355eacf7d8SeopXD GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
37365eacf7d8SeopXD GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
37375eacf7d8SeopXD GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
3738230b53ddSLIU Zhiwei 
3739e848a1e5SFrank Chang /*
3740e848a1e5SFrank Chang  * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3741e848a1e5SFrank Chang  *
3742e848a1e5SFrank Chang  * Adapted from riscv-v-spec recip.c:
3743e848a1e5SFrank Chang  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3744e848a1e5SFrank Chang  */
3745e848a1e5SFrank Chang static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3746e848a1e5SFrank Chang {
3747e848a1e5SFrank Chang     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3748e848a1e5SFrank Chang     uint64_t exp = extract64(f, frac_size, exp_size);
3749e848a1e5SFrank Chang     uint64_t frac = extract64(f, 0, frac_size);
3750e848a1e5SFrank Chang 
3751e848a1e5SFrank Chang     const uint8_t lookup_table[] = {
3752e848a1e5SFrank Chang         52, 51, 50, 48, 47, 46, 44, 43,
3753e848a1e5SFrank Chang         42, 41, 40, 39, 38, 36, 35, 34,
3754e848a1e5SFrank Chang         33, 32, 31, 30, 30, 29, 28, 27,
3755e848a1e5SFrank Chang         26, 25, 24, 23, 23, 22, 21, 20,
3756e848a1e5SFrank Chang         19, 19, 18, 17, 16, 16, 15, 14,
3757e848a1e5SFrank Chang         14, 13, 12, 12, 11, 10, 10, 9,
3758e848a1e5SFrank Chang         9, 8, 7, 7, 6, 6, 5, 4,
3759e848a1e5SFrank Chang         4, 3, 3, 2, 2, 1, 1, 0,
3760e848a1e5SFrank Chang         127, 125, 123, 121, 119, 118, 116, 114,
3761e848a1e5SFrank Chang         113, 111, 109, 108, 106, 105, 103, 102,
3762e848a1e5SFrank Chang         100, 99, 97, 96, 95, 93, 92, 91,
3763e848a1e5SFrank Chang         90, 88, 87, 86, 85, 84, 83, 82,
3764e848a1e5SFrank Chang         80, 79, 78, 77, 76, 75, 74, 73,
3765e848a1e5SFrank Chang         72, 71, 70, 70, 69, 68, 67, 66,
3766e848a1e5SFrank Chang         65, 64, 63, 63, 62, 61, 60, 59,
3767e848a1e5SFrank Chang         59, 58, 57, 56, 56, 55, 54, 53
3768e848a1e5SFrank Chang     };
3769e848a1e5SFrank Chang     const int precision = 7;
3770e848a1e5SFrank Chang 
3771e848a1e5SFrank Chang     if (exp == 0 && frac != 0) { /* subnormal */
3772e848a1e5SFrank Chang         /* Normalize the subnormal. */
3773e848a1e5SFrank Chang         while (extract64(frac, frac_size - 1, 1) == 0) {
3774e848a1e5SFrank Chang             exp--;
3775e848a1e5SFrank Chang             frac <<= 1;
3776e848a1e5SFrank Chang         }
3777e848a1e5SFrank Chang 
3778e848a1e5SFrank Chang         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3779e848a1e5SFrank Chang     }
3780e848a1e5SFrank Chang 
3781e848a1e5SFrank Chang     int idx = ((exp & 1) << (precision - 1)) |
3782e848a1e5SFrank Chang               (frac >> (frac_size - precision + 1));
3783e848a1e5SFrank Chang     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3784e848a1e5SFrank Chang                         (frac_size - precision);
3785e848a1e5SFrank Chang     uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3786e848a1e5SFrank Chang 
3787e848a1e5SFrank Chang     uint64_t val = 0;
3788e848a1e5SFrank Chang     val = deposit64(val, 0, frac_size, out_frac);
3789e848a1e5SFrank Chang     val = deposit64(val, frac_size, exp_size, out_exp);
3790e848a1e5SFrank Chang     val = deposit64(val, frac_size + exp_size, 1, sign);
3791e848a1e5SFrank Chang     return val;
3792e848a1e5SFrank Chang }
3793e848a1e5SFrank Chang 
3794e848a1e5SFrank Chang static float16 frsqrt7_h(float16 f, float_status *s)
3795e848a1e5SFrank Chang {
3796e848a1e5SFrank Chang     int exp_size = 5, frac_size = 10;
frsqrt7_h(float16 f,float_status * s)3797e848a1e5SFrank Chang     bool sign = float16_is_neg(f);
3798e848a1e5SFrank Chang 
3799e848a1e5SFrank Chang     /*
3800e848a1e5SFrank Chang      * frsqrt7(sNaN) = canonical NaN
3801e848a1e5SFrank Chang      * frsqrt7(-inf) = canonical NaN
3802e848a1e5SFrank Chang      * frsqrt7(-normal) = canonical NaN
3803e848a1e5SFrank Chang      * frsqrt7(-subnormal) = canonical NaN
3804e848a1e5SFrank Chang      */
3805e848a1e5SFrank Chang     if (float16_is_signaling_nan(f, s) ||
3806e848a1e5SFrank Chang         (float16_is_infinity(f) && sign) ||
3807e848a1e5SFrank Chang         (float16_is_normal(f) && sign) ||
3808e848a1e5SFrank Chang         (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3809e848a1e5SFrank Chang         s->float_exception_flags |= float_flag_invalid;
3810e848a1e5SFrank Chang         return float16_default_nan(s);
3811e848a1e5SFrank Chang     }
3812e848a1e5SFrank Chang 
3813e848a1e5SFrank Chang     /* frsqrt7(qNaN) = canonical NaN */
3814e848a1e5SFrank Chang     if (float16_is_quiet_nan(f, s)) {
3815e848a1e5SFrank Chang         return float16_default_nan(s);
3816e848a1e5SFrank Chang     }
3817e848a1e5SFrank Chang 
3818e848a1e5SFrank Chang     /* frsqrt7(+-0) = +-inf */
3819e848a1e5SFrank Chang     if (float16_is_zero(f)) {
3820e848a1e5SFrank Chang         s->float_exception_flags |= float_flag_divbyzero;
3821e848a1e5SFrank Chang         return float16_set_sign(float16_infinity, sign);
3822e848a1e5SFrank Chang     }
3823e848a1e5SFrank Chang 
3824e848a1e5SFrank Chang     /* frsqrt7(+inf) = +0 */
3825e848a1e5SFrank Chang     if (float16_is_infinity(f) && !sign) {
3826e848a1e5SFrank Chang         return float16_set_sign(float16_zero, sign);
3827e848a1e5SFrank Chang     }
3828e848a1e5SFrank Chang 
3829e848a1e5SFrank Chang     /* +normal, +subnormal */
3830e848a1e5SFrank Chang     uint64_t val = frsqrt7(f, exp_size, frac_size);
3831e848a1e5SFrank Chang     return make_float16(val);
3832e848a1e5SFrank Chang }
3833e848a1e5SFrank Chang 
3834e848a1e5SFrank Chang static float32 frsqrt7_s(float32 f, float_status *s)
3835e848a1e5SFrank Chang {
3836e848a1e5SFrank Chang     int exp_size = 8, frac_size = 23;
frsqrt7_s(float32 f,float_status * s)3837e848a1e5SFrank Chang     bool sign = float32_is_neg(f);
3838e848a1e5SFrank Chang 
3839e848a1e5SFrank Chang     /*
3840e848a1e5SFrank Chang      * frsqrt7(sNaN) = canonical NaN
3841e848a1e5SFrank Chang      * frsqrt7(-inf) = canonical NaN
3842e848a1e5SFrank Chang      * frsqrt7(-normal) = canonical NaN
3843e848a1e5SFrank Chang      * frsqrt7(-subnormal) = canonical NaN
3844e848a1e5SFrank Chang      */
3845e848a1e5SFrank Chang     if (float32_is_signaling_nan(f, s) ||
3846e848a1e5SFrank Chang         (float32_is_infinity(f) && sign) ||
3847e848a1e5SFrank Chang         (float32_is_normal(f) && sign) ||
3848e848a1e5SFrank Chang         (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3849e848a1e5SFrank Chang         s->float_exception_flags |= float_flag_invalid;
3850e848a1e5SFrank Chang         return float32_default_nan(s);
3851e848a1e5SFrank Chang     }
3852e848a1e5SFrank Chang 
3853e848a1e5SFrank Chang     /* frsqrt7(qNaN) = canonical NaN */
3854e848a1e5SFrank Chang     if (float32_is_quiet_nan(f, s)) {
3855e848a1e5SFrank Chang         return float32_default_nan(s);
3856e848a1e5SFrank Chang     }
3857e848a1e5SFrank Chang 
3858e848a1e5SFrank Chang     /* frsqrt7(+-0) = +-inf */
3859e848a1e5SFrank Chang     if (float32_is_zero(f)) {
3860e848a1e5SFrank Chang         s->float_exception_flags |= float_flag_divbyzero;
3861e848a1e5SFrank Chang         return float32_set_sign(float32_infinity, sign);
3862e848a1e5SFrank Chang     }
3863e848a1e5SFrank Chang 
3864e848a1e5SFrank Chang     /* frsqrt7(+inf) = +0 */
3865e848a1e5SFrank Chang     if (float32_is_infinity(f) && !sign) {
3866e848a1e5SFrank Chang         return float32_set_sign(float32_zero, sign);
3867e848a1e5SFrank Chang     }
3868e848a1e5SFrank Chang 
3869e848a1e5SFrank Chang     /* +normal, +subnormal */
3870e848a1e5SFrank Chang     uint64_t val = frsqrt7(f, exp_size, frac_size);
3871e848a1e5SFrank Chang     return make_float32(val);
3872e848a1e5SFrank Chang }
3873e848a1e5SFrank Chang 
3874e848a1e5SFrank Chang static float64 frsqrt7_d(float64 f, float_status *s)
3875e848a1e5SFrank Chang {
3876e848a1e5SFrank Chang     int exp_size = 11, frac_size = 52;
frsqrt7_d(float64 f,float_status * s)3877e848a1e5SFrank Chang     bool sign = float64_is_neg(f);
3878e848a1e5SFrank Chang 
3879e848a1e5SFrank Chang     /*
3880e848a1e5SFrank Chang      * frsqrt7(sNaN) = canonical NaN
3881e848a1e5SFrank Chang      * frsqrt7(-inf) = canonical NaN
3882e848a1e5SFrank Chang      * frsqrt7(-normal) = canonical NaN
3883e848a1e5SFrank Chang      * frsqrt7(-subnormal) = canonical NaN
3884e848a1e5SFrank Chang      */
3885e848a1e5SFrank Chang     if (float64_is_signaling_nan(f, s) ||
3886e848a1e5SFrank Chang         (float64_is_infinity(f) && sign) ||
3887e848a1e5SFrank Chang         (float64_is_normal(f) && sign) ||
3888e848a1e5SFrank Chang         (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3889e848a1e5SFrank Chang         s->float_exception_flags |= float_flag_invalid;
3890e848a1e5SFrank Chang         return float64_default_nan(s);
3891e848a1e5SFrank Chang     }
3892e848a1e5SFrank Chang 
3893e848a1e5SFrank Chang     /* frsqrt7(qNaN) = canonical NaN */
3894e848a1e5SFrank Chang     if (float64_is_quiet_nan(f, s)) {
3895e848a1e5SFrank Chang         return float64_default_nan(s);
3896e848a1e5SFrank Chang     }
3897e848a1e5SFrank Chang 
3898e848a1e5SFrank Chang     /* frsqrt7(+-0) = +-inf */
3899e848a1e5SFrank Chang     if (float64_is_zero(f)) {
3900e848a1e5SFrank Chang         s->float_exception_flags |= float_flag_divbyzero;
3901e848a1e5SFrank Chang         return float64_set_sign(float64_infinity, sign);
3902e848a1e5SFrank Chang     }
3903e848a1e5SFrank Chang 
3904e848a1e5SFrank Chang     /* frsqrt7(+inf) = +0 */
3905e848a1e5SFrank Chang     if (float64_is_infinity(f) && !sign) {
3906e848a1e5SFrank Chang         return float64_set_sign(float64_zero, sign);
3907e848a1e5SFrank Chang     }
3908e848a1e5SFrank Chang 
3909e848a1e5SFrank Chang     /* +normal, +subnormal */
3910e848a1e5SFrank Chang     uint64_t val = frsqrt7(f, exp_size, frac_size);
3911e848a1e5SFrank Chang     return make_float64(val);
3912e848a1e5SFrank Chang }
3913e848a1e5SFrank Chang 
3914e848a1e5SFrank Chang RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3915e848a1e5SFrank Chang RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3916e848a1e5SFrank Chang RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
RVVCALL(OPFVV1,vfrsqrt7_v_h,OP_UU_H,H2,H2,frsqrt7_h)39175eacf7d8SeopXD GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
39185eacf7d8SeopXD GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
39195eacf7d8SeopXD GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
3920e848a1e5SFrank Chang 
392155c35407SFrank Chang /*
392255c35407SFrank Chang  * Vector Floating-Point Reciprocal Estimate Instruction
392355c35407SFrank Chang  *
392455c35407SFrank Chang  * Adapted from riscv-v-spec recip.c:
392555c35407SFrank Chang  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
392655c35407SFrank Chang  */
392755c35407SFrank Chang static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
392855c35407SFrank Chang                       float_status *s)
392955c35407SFrank Chang {
393055c35407SFrank Chang     uint64_t sign = extract64(f, frac_size + exp_size, 1);
393155c35407SFrank Chang     uint64_t exp = extract64(f, frac_size, exp_size);
393255c35407SFrank Chang     uint64_t frac = extract64(f, 0, frac_size);
393355c35407SFrank Chang 
393455c35407SFrank Chang     const uint8_t lookup_table[] = {
393555c35407SFrank Chang         127, 125, 123, 121, 119, 117, 116, 114,
393655c35407SFrank Chang         112, 110, 109, 107, 105, 104, 102, 100,
393755c35407SFrank Chang         99, 97, 96, 94, 93, 91, 90, 88,
393855c35407SFrank Chang         87, 85, 84, 83, 81, 80, 79, 77,
393955c35407SFrank Chang         76, 75, 74, 72, 71, 70, 69, 68,
394055c35407SFrank Chang         66, 65, 64, 63, 62, 61, 60, 59,
394155c35407SFrank Chang         58, 57, 56, 55, 54, 53, 52, 51,
394255c35407SFrank Chang         50, 49, 48, 47, 46, 45, 44, 43,
394355c35407SFrank Chang         42, 41, 40, 40, 39, 38, 37, 36,
394455c35407SFrank Chang         35, 35, 34, 33, 32, 31, 31, 30,
394555c35407SFrank Chang         29, 28, 28, 27, 26, 25, 25, 24,
394655c35407SFrank Chang         23, 23, 22, 21, 21, 20, 19, 19,
394755c35407SFrank Chang         18, 17, 17, 16, 15, 15, 14, 14,
394855c35407SFrank Chang         13, 12, 12, 11, 11, 10, 9, 9,
394955c35407SFrank Chang         8, 8, 7, 7, 6, 5, 5, 4,
395055c35407SFrank Chang         4, 3, 3, 2, 2, 1, 1, 0
395155c35407SFrank Chang     };
395255c35407SFrank Chang     const int precision = 7;
395355c35407SFrank Chang 
395455c35407SFrank Chang     if (exp == 0 && frac != 0) { /* subnormal */
395555c35407SFrank Chang         /* Normalize the subnormal. */
395655c35407SFrank Chang         while (extract64(frac, frac_size - 1, 1) == 0) {
395755c35407SFrank Chang             exp--;
395855c35407SFrank Chang             frac <<= 1;
395955c35407SFrank Chang         }
396055c35407SFrank Chang 
396155c35407SFrank Chang         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
396255c35407SFrank Chang 
396355c35407SFrank Chang         if (exp != 0 && exp != UINT64_MAX) {
396455c35407SFrank Chang             /*
396555c35407SFrank Chang              * Overflow to inf or max value of same sign,
396655c35407SFrank Chang              * depending on sign and rounding mode.
396755c35407SFrank Chang              */
396855c35407SFrank Chang             s->float_exception_flags |= (float_flag_inexact |
396955c35407SFrank Chang                                          float_flag_overflow);
397055c35407SFrank Chang 
397155c35407SFrank Chang             if ((s->float_rounding_mode == float_round_to_zero) ||
397255c35407SFrank Chang                 ((s->float_rounding_mode == float_round_down) && !sign) ||
397355c35407SFrank Chang                 ((s->float_rounding_mode == float_round_up) && sign)) {
397455c35407SFrank Chang                 /* Return greatest/negative finite value. */
397555c35407SFrank Chang                 return (sign << (exp_size + frac_size)) |
397655c35407SFrank Chang                        (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
397755c35407SFrank Chang             } else {
397855c35407SFrank Chang                 /* Return +-inf. */
397955c35407SFrank Chang                 return (sign << (exp_size + frac_size)) |
398055c35407SFrank Chang                        MAKE_64BIT_MASK(frac_size, exp_size);
398155c35407SFrank Chang             }
398255c35407SFrank Chang         }
398355c35407SFrank Chang     }
398455c35407SFrank Chang 
398555c35407SFrank Chang     int idx = frac >> (frac_size - precision);
398655c35407SFrank Chang     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
398755c35407SFrank Chang                         (frac_size - precision);
398855c35407SFrank Chang     uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
398955c35407SFrank Chang 
399055c35407SFrank Chang     if (out_exp == 0 || out_exp == UINT64_MAX) {
399155c35407SFrank Chang         /*
399255c35407SFrank Chang          * The result is subnormal, but don't raise the underflow exception,
399355c35407SFrank Chang          * because there's no additional loss of precision.
399455c35407SFrank Chang          */
399555c35407SFrank Chang         out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
399655c35407SFrank Chang         if (out_exp == UINT64_MAX) {
399755c35407SFrank Chang             out_frac >>= 1;
399855c35407SFrank Chang             out_exp = 0;
399955c35407SFrank Chang         }
400055c35407SFrank Chang     }
400155c35407SFrank Chang 
400255c35407SFrank Chang     uint64_t val = 0;
400355c35407SFrank Chang     val = deposit64(val, 0, frac_size, out_frac);
400455c35407SFrank Chang     val = deposit64(val, frac_size, exp_size, out_exp);
400555c35407SFrank Chang     val = deposit64(val, frac_size + exp_size, 1, sign);
400655c35407SFrank Chang     return val;
400755c35407SFrank Chang }
400855c35407SFrank Chang 
400955c35407SFrank Chang static float16 frec7_h(float16 f, float_status *s)
401055c35407SFrank Chang {
401155c35407SFrank Chang     int exp_size = 5, frac_size = 10;
frec7_h(float16 f,float_status * s)401255c35407SFrank Chang     bool sign = float16_is_neg(f);
401355c35407SFrank Chang 
401455c35407SFrank Chang     /* frec7(+-inf) = +-0 */
401555c35407SFrank Chang     if (float16_is_infinity(f)) {
401655c35407SFrank Chang         return float16_set_sign(float16_zero, sign);
401755c35407SFrank Chang     }
401855c35407SFrank Chang 
401955c35407SFrank Chang     /* frec7(+-0) = +-inf */
402055c35407SFrank Chang     if (float16_is_zero(f)) {
402155c35407SFrank Chang         s->float_exception_flags |= float_flag_divbyzero;
402255c35407SFrank Chang         return float16_set_sign(float16_infinity, sign);
402355c35407SFrank Chang     }
402455c35407SFrank Chang 
402555c35407SFrank Chang     /* frec7(sNaN) = canonical NaN */
402655c35407SFrank Chang     if (float16_is_signaling_nan(f, s)) {
402755c35407SFrank Chang         s->float_exception_flags |= float_flag_invalid;
402855c35407SFrank Chang         return float16_default_nan(s);
402955c35407SFrank Chang     }
403055c35407SFrank Chang 
403155c35407SFrank Chang     /* frec7(qNaN) = canonical NaN */
403255c35407SFrank Chang     if (float16_is_quiet_nan(f, s)) {
403355c35407SFrank Chang         return float16_default_nan(s);
403455c35407SFrank Chang     }
403555c35407SFrank Chang 
403655c35407SFrank Chang     /* +-normal, +-subnormal */
403755c35407SFrank Chang     uint64_t val = frec7(f, exp_size, frac_size, s);
403855c35407SFrank Chang     return make_float16(val);
403955c35407SFrank Chang }
404055c35407SFrank Chang 
404155c35407SFrank Chang static float32 frec7_s(float32 f, float_status *s)
404255c35407SFrank Chang {
404355c35407SFrank Chang     int exp_size = 8, frac_size = 23;
frec7_s(float32 f,float_status * s)404455c35407SFrank Chang     bool sign = float32_is_neg(f);
404555c35407SFrank Chang 
404655c35407SFrank Chang     /* frec7(+-inf) = +-0 */
404755c35407SFrank Chang     if (float32_is_infinity(f)) {
404855c35407SFrank Chang         return float32_set_sign(float32_zero, sign);
404955c35407SFrank Chang     }
405055c35407SFrank Chang 
405155c35407SFrank Chang     /* frec7(+-0) = +-inf */
405255c35407SFrank Chang     if (float32_is_zero(f)) {
405355c35407SFrank Chang         s->float_exception_flags |= float_flag_divbyzero;
405455c35407SFrank Chang         return float32_set_sign(float32_infinity, sign);
405555c35407SFrank Chang     }
405655c35407SFrank Chang 
405755c35407SFrank Chang     /* frec7(sNaN) = canonical NaN */
405855c35407SFrank Chang     if (float32_is_signaling_nan(f, s)) {
405955c35407SFrank Chang         s->float_exception_flags |= float_flag_invalid;
406055c35407SFrank Chang         return float32_default_nan(s);
406155c35407SFrank Chang     }
406255c35407SFrank Chang 
406355c35407SFrank Chang     /* frec7(qNaN) = canonical NaN */
406455c35407SFrank Chang     if (float32_is_quiet_nan(f, s)) {
406555c35407SFrank Chang         return float32_default_nan(s);
406655c35407SFrank Chang     }
406755c35407SFrank Chang 
406855c35407SFrank Chang     /* +-normal, +-subnormal */
406955c35407SFrank Chang     uint64_t val = frec7(f, exp_size, frac_size, s);
407055c35407SFrank Chang     return make_float32(val);
407155c35407SFrank Chang }
407255c35407SFrank Chang 
407355c35407SFrank Chang static float64 frec7_d(float64 f, float_status *s)
407455c35407SFrank Chang {
407555c35407SFrank Chang     int exp_size = 11, frac_size = 52;
frec7_d(float64 f,float_status * s)407655c35407SFrank Chang     bool sign = float64_is_neg(f);
407755c35407SFrank Chang 
407855c35407SFrank Chang     /* frec7(+-inf) = +-0 */
407955c35407SFrank Chang     if (float64_is_infinity(f)) {
408055c35407SFrank Chang         return float64_set_sign(float64_zero, sign);
408155c35407SFrank Chang     }
408255c35407SFrank Chang 
408355c35407SFrank Chang     /* frec7(+-0) = +-inf */
408455c35407SFrank Chang     if (float64_is_zero(f)) {
408555c35407SFrank Chang         s->float_exception_flags |= float_flag_divbyzero;
408655c35407SFrank Chang         return float64_set_sign(float64_infinity, sign);
408755c35407SFrank Chang     }
408855c35407SFrank Chang 
408955c35407SFrank Chang     /* frec7(sNaN) = canonical NaN */
409055c35407SFrank Chang     if (float64_is_signaling_nan(f, s)) {
409155c35407SFrank Chang         s->float_exception_flags |= float_flag_invalid;
409255c35407SFrank Chang         return float64_default_nan(s);
409355c35407SFrank Chang     }
409455c35407SFrank Chang 
409555c35407SFrank Chang     /* frec7(qNaN) = canonical NaN */
409655c35407SFrank Chang     if (float64_is_quiet_nan(f, s)) {
409755c35407SFrank Chang         return float64_default_nan(s);
409855c35407SFrank Chang     }
409955c35407SFrank Chang 
410055c35407SFrank Chang     /* +-normal, +-subnormal */
410155c35407SFrank Chang     uint64_t val = frec7(f, exp_size, frac_size, s);
410255c35407SFrank Chang     return make_float64(val);
410355c35407SFrank Chang }
410455c35407SFrank Chang 
410555c35407SFrank Chang RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
410655c35407SFrank Chang RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
410755c35407SFrank Chang RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
RVVCALL(OPFVV1,vfrec7_v_h,OP_UU_H,H2,H2,frec7_h)41085eacf7d8SeopXD GEN_VEXT_V_ENV(vfrec7_v_h, 2)
41095eacf7d8SeopXD GEN_VEXT_V_ENV(vfrec7_v_w, 4)
41105eacf7d8SeopXD GEN_VEXT_V_ENV(vfrec7_v_d, 8)
411155c35407SFrank Chang 
4112230b53ddSLIU Zhiwei /* Vector Floating-Point MIN/MAX Instructions */
411349c5611aSFrank Chang RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
411449c5611aSFrank Chang RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
411549c5611aSFrank Chang RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
41165eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
41175eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
41185eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
411949c5611aSFrank Chang RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
412049c5611aSFrank Chang RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
412149c5611aSFrank Chang RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
41225eacf7d8SeopXD GEN_VEXT_VF(vfmin_vf_h, 2)
41235eacf7d8SeopXD GEN_VEXT_VF(vfmin_vf_w, 4)
41245eacf7d8SeopXD GEN_VEXT_VF(vfmin_vf_d, 8)
4125230b53ddSLIU Zhiwei 
412649c5611aSFrank Chang RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
412749c5611aSFrank Chang RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
412849c5611aSFrank Chang RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
41295eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
41305eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
41315eacf7d8SeopXD GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
413249c5611aSFrank Chang RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
413349c5611aSFrank Chang RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
413449c5611aSFrank Chang RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
41355eacf7d8SeopXD GEN_VEXT_VF(vfmax_vf_h, 2)
41365eacf7d8SeopXD GEN_VEXT_VF(vfmax_vf_w, 4)
41375eacf7d8SeopXD GEN_VEXT_VF(vfmax_vf_d, 8)
41381d426b81SLIU Zhiwei 
41391d426b81SLIU Zhiwei /* Vector Floating-Point Sign-Injection Instructions */
41401d426b81SLIU Zhiwei static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
41411d426b81SLIU Zhiwei {
41421d426b81SLIU Zhiwei     return deposit64(b, 0, 15, a);
41431d426b81SLIU Zhiwei }
41441d426b81SLIU Zhiwei 
41451d426b81SLIU Zhiwei static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
41461d426b81SLIU Zhiwei {
41471d426b81SLIU Zhiwei     return deposit64(b, 0, 31, a);
fsgnj32(uint32_t a,uint32_t b,float_status * s)41481d426b81SLIU Zhiwei }
41491d426b81SLIU Zhiwei 
41501d426b81SLIU Zhiwei static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
41511d426b81SLIU Zhiwei {
41521d426b81SLIU Zhiwei     return deposit64(b, 0, 63, a);
41531d426b81SLIU Zhiwei }
41541d426b81SLIU Zhiwei 
41551d426b81SLIU Zhiwei RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
41561d426b81SLIU Zhiwei RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
41571d426b81SLIU Zhiwei RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
RVVCALL(OPFVV2,vfsgnj_vv_h,OP_UUU_H,H2,H2,H2,fsgnj16)41585eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
41595eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
41605eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
41611d426b81SLIU Zhiwei RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
41621d426b81SLIU Zhiwei RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
41631d426b81SLIU Zhiwei RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
41645eacf7d8SeopXD GEN_VEXT_VF(vfsgnj_vf_h, 2)
41655eacf7d8SeopXD GEN_VEXT_VF(vfsgnj_vf_w, 4)
41665eacf7d8SeopXD GEN_VEXT_VF(vfsgnj_vf_d, 8)
41671d426b81SLIU Zhiwei 
41681d426b81SLIU Zhiwei static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
41691d426b81SLIU Zhiwei {
41701d426b81SLIU Zhiwei     return deposit64(~b, 0, 15, a);
41711d426b81SLIU Zhiwei }
41721d426b81SLIU Zhiwei 
41731d426b81SLIU Zhiwei static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
41741d426b81SLIU Zhiwei {
41751d426b81SLIU Zhiwei     return deposit64(~b, 0, 31, a);
fsgnjn32(uint32_t a,uint32_t b,float_status * s)41761d426b81SLIU Zhiwei }
41771d426b81SLIU Zhiwei 
41781d426b81SLIU Zhiwei static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
41791d426b81SLIU Zhiwei {
41801d426b81SLIU Zhiwei     return deposit64(~b, 0, 63, a);
41811d426b81SLIU Zhiwei }
41821d426b81SLIU Zhiwei 
41831d426b81SLIU Zhiwei RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
41841d426b81SLIU Zhiwei RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
41851d426b81SLIU Zhiwei RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
RVVCALL(OPFVV2,vfsgnjn_vv_h,OP_UUU_H,H2,H2,H2,fsgnjn16)41865eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
41875eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
41885eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
41891d426b81SLIU Zhiwei RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
41901d426b81SLIU Zhiwei RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
41911d426b81SLIU Zhiwei RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
41925eacf7d8SeopXD GEN_VEXT_VF(vfsgnjn_vf_h, 2)
41935eacf7d8SeopXD GEN_VEXT_VF(vfsgnjn_vf_w, 4)
41945eacf7d8SeopXD GEN_VEXT_VF(vfsgnjn_vf_d, 8)
41951d426b81SLIU Zhiwei 
41961d426b81SLIU Zhiwei static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
41971d426b81SLIU Zhiwei {
41981d426b81SLIU Zhiwei     return deposit64(b ^ a, 0, 15, a);
41991d426b81SLIU Zhiwei }
42001d426b81SLIU Zhiwei 
42011d426b81SLIU Zhiwei static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
42021d426b81SLIU Zhiwei {
42031d426b81SLIU Zhiwei     return deposit64(b ^ a, 0, 31, a);
fsgnjx32(uint32_t a,uint32_t b,float_status * s)42041d426b81SLIU Zhiwei }
42051d426b81SLIU Zhiwei 
42061d426b81SLIU Zhiwei static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
42071d426b81SLIU Zhiwei {
42081d426b81SLIU Zhiwei     return deposit64(b ^ a, 0, 63, a);
42091d426b81SLIU Zhiwei }
42101d426b81SLIU Zhiwei 
42111d426b81SLIU Zhiwei RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
42121d426b81SLIU Zhiwei RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
42131d426b81SLIU Zhiwei RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
RVVCALL(OPFVV2,vfsgnjx_vv_h,OP_UUU_H,H2,H2,H2,fsgnjx16)42145eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
42155eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
42165eacf7d8SeopXD GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
42171d426b81SLIU Zhiwei RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
42181d426b81SLIU Zhiwei RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
42191d426b81SLIU Zhiwei RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
42205eacf7d8SeopXD GEN_VEXT_VF(vfsgnjx_vf_h, 2)
42215eacf7d8SeopXD GEN_VEXT_VF(vfsgnjx_vf_w, 4)
42225eacf7d8SeopXD GEN_VEXT_VF(vfsgnjx_vf_d, 8)
42232a68e9e5SLIU Zhiwei 
42242a68e9e5SLIU Zhiwei /* Vector Floating-Point Compare Instructions */
42252a68e9e5SLIU Zhiwei #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
42262a68e9e5SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
42272a68e9e5SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)          \
42282a68e9e5SLIU Zhiwei {                                                             \
42292a68e9e5SLIU Zhiwei     uint32_t vm = vext_vm(desc);                              \
42302a68e9e5SLIU Zhiwei     uint32_t vl = env->vl;                                    \
423158bc9063SDaniel Henrique Barboza     uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;    \
42325eacf7d8SeopXD     uint32_t vta_all_1s = vext_vta_all_1s(desc);              \
42335b448f44SYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                            \
42342a68e9e5SLIU Zhiwei     uint32_t i;                                               \
42352a68e9e5SLIU Zhiwei                                                               \
42367f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                         \
4237df4252b2SDaniel Henrique Barboza                                                               \
4238f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                      \
42392a68e9e5SLIU Zhiwei         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
42402a68e9e5SLIU Zhiwei         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4241f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                  \
42425b448f44SYueh-Ting (eop) Chen             /* set masked-off elements to 1s */               \
42435b448f44SYueh-Ting (eop) Chen             if (vma) {                                        \
42445b448f44SYueh-Ting (eop) Chen                 vext_set_elem_mask(vd, i, 1);                 \
42455b448f44SYueh-Ting (eop) Chen             }                                                 \
42462a68e9e5SLIU Zhiwei             continue;                                         \
42472a68e9e5SLIU Zhiwei         }                                                     \
4248f9298de5SFrank Chang         vext_set_elem_mask(vd, i,                             \
42492a68e9e5SLIU Zhiwei                            DO_OP(s2, s1, &env->fp_status));   \
42502a68e9e5SLIU Zhiwei     }                                                         \
4251f714361eSFrank Chang     env->vstart = 0;                                          \
42523b57254dSWeiwei Li     /*
42533b57254dSWeiwei Li      * mask destination register are always tail-agnostic
42543b57254dSWeiwei Li      * set tail elements to 1s
42553b57254dSWeiwei Li      */                                                       \
42565eacf7d8SeopXD     if (vta_all_1s) {                                         \
42575eacf7d8SeopXD         for (; i < total_elems; i++) {                        \
42585eacf7d8SeopXD             vext_set_elem_mask(vd, i, 1);                     \
42595eacf7d8SeopXD         }                                                     \
42605eacf7d8SeopXD     }                                                         \
42612a68e9e5SLIU Zhiwei }
42622a68e9e5SLIU Zhiwei 
42632a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
42642a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
42652a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
42662a68e9e5SLIU Zhiwei 
42672a68e9e5SLIU Zhiwei #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
42682a68e9e5SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
42692a68e9e5SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                \
42702a68e9e5SLIU Zhiwei {                                                                   \
42712a68e9e5SLIU Zhiwei     uint32_t vm = vext_vm(desc);                                    \
42722a68e9e5SLIU Zhiwei     uint32_t vl = env->vl;                                          \
427358bc9063SDaniel Henrique Barboza     uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;          \
42745eacf7d8SeopXD     uint32_t vta_all_1s = vext_vta_all_1s(desc);                    \
42755b448f44SYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                  \
42762a68e9e5SLIU Zhiwei     uint32_t i;                                                     \
42772a68e9e5SLIU Zhiwei                                                                     \
42787f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                               \
4279df4252b2SDaniel Henrique Barboza                                                                     \
4280f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                            \
42812a68e9e5SLIU Zhiwei         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
4282f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                        \
42835b448f44SYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                     \
42845b448f44SYueh-Ting (eop) Chen             if (vma) {                                              \
42855b448f44SYueh-Ting (eop) Chen                 vext_set_elem_mask(vd, i, 1);                       \
42865b448f44SYueh-Ting (eop) Chen             }                                                       \
42872a68e9e5SLIU Zhiwei             continue;                                               \
42882a68e9e5SLIU Zhiwei         }                                                           \
4289f9298de5SFrank Chang         vext_set_elem_mask(vd, i,                                   \
42902a68e9e5SLIU Zhiwei                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
42912a68e9e5SLIU Zhiwei     }                                                               \
4292f714361eSFrank Chang     env->vstart = 0;                                                \
42933b57254dSWeiwei Li     /*
42943b57254dSWeiwei Li      * mask destination register are always tail-agnostic
42953b57254dSWeiwei Li      * set tail elements to 1s
42963b57254dSWeiwei Li      */                                                             \
42975eacf7d8SeopXD     if (vta_all_1s) {                                               \
42985eacf7d8SeopXD         for (; i < total_elems; i++) {                              \
42995eacf7d8SeopXD             vext_set_elem_mask(vd, i, 1);                           \
43005eacf7d8SeopXD         }                                                           \
43015eacf7d8SeopXD     }                                                               \
43022a68e9e5SLIU Zhiwei }
43032a68e9e5SLIU Zhiwei 
43042a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
43052a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
43062a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
43072a68e9e5SLIU Zhiwei 
43082a68e9e5SLIU Zhiwei static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
43092a68e9e5SLIU Zhiwei {
43102a68e9e5SLIU Zhiwei     FloatRelation compare = float16_compare_quiet(a, b, s);
43112a68e9e5SLIU Zhiwei     return compare != float_relation_equal;
43122a68e9e5SLIU Zhiwei }
43132a68e9e5SLIU Zhiwei 
43142a68e9e5SLIU Zhiwei static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
43152a68e9e5SLIU Zhiwei {
43162a68e9e5SLIU Zhiwei     FloatRelation compare = float32_compare_quiet(a, b, s);
vmfne32(uint32_t a,uint32_t b,float_status * s)43172a68e9e5SLIU Zhiwei     return compare != float_relation_equal;
43182a68e9e5SLIU Zhiwei }
43192a68e9e5SLIU Zhiwei 
43202a68e9e5SLIU Zhiwei static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
43212a68e9e5SLIU Zhiwei {
43222a68e9e5SLIU Zhiwei     FloatRelation compare = float64_compare_quiet(a, b, s);
vmfne64(uint64_t a,uint64_t b,float_status * s)43232a68e9e5SLIU Zhiwei     return compare != float_relation_equal;
43242a68e9e5SLIU Zhiwei }
43252a68e9e5SLIU Zhiwei 
43262a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
43272a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
43282a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
GEN_VEXT_CMP_VV_ENV(vmfne_vv_h,uint16_t,H2,vmfne16)43292a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
43302a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
43312a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
43322a68e9e5SLIU Zhiwei 
43332a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
43342a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
43352a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
43362a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
43372a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
43382a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
43392a68e9e5SLIU Zhiwei 
43402a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
43412a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
43422a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
43432a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
43442a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
43452a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
43462a68e9e5SLIU Zhiwei 
43472a68e9e5SLIU Zhiwei static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
43482a68e9e5SLIU Zhiwei {
43492a68e9e5SLIU Zhiwei     FloatRelation compare = float16_compare(a, b, s);
43502a68e9e5SLIU Zhiwei     return compare == float_relation_greater;
43512a68e9e5SLIU Zhiwei }
43522a68e9e5SLIU Zhiwei 
43532a68e9e5SLIU Zhiwei static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
43542a68e9e5SLIU Zhiwei {
43552a68e9e5SLIU Zhiwei     FloatRelation compare = float32_compare(a, b, s);
vmfgt32(uint32_t a,uint32_t b,float_status * s)43562a68e9e5SLIU Zhiwei     return compare == float_relation_greater;
43572a68e9e5SLIU Zhiwei }
43582a68e9e5SLIU Zhiwei 
43592a68e9e5SLIU Zhiwei static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
43602a68e9e5SLIU Zhiwei {
43612a68e9e5SLIU Zhiwei     FloatRelation compare = float64_compare(a, b, s);
vmfgt64(uint64_t a,uint64_t b,float_status * s)43622a68e9e5SLIU Zhiwei     return compare == float_relation_greater;
43632a68e9e5SLIU Zhiwei }
43642a68e9e5SLIU Zhiwei 
43652a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
43662a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
43672a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
GEN_VEXT_CMP_VF(vmfgt_vf_h,uint16_t,H2,vmfgt16)43682a68e9e5SLIU Zhiwei 
43692a68e9e5SLIU Zhiwei static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
43702a68e9e5SLIU Zhiwei {
43712a68e9e5SLIU Zhiwei     FloatRelation compare = float16_compare(a, b, s);
43722a68e9e5SLIU Zhiwei     return compare == float_relation_greater ||
43732a68e9e5SLIU Zhiwei            compare == float_relation_equal;
43742a68e9e5SLIU Zhiwei }
43752a68e9e5SLIU Zhiwei 
43762a68e9e5SLIU Zhiwei static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
43772a68e9e5SLIU Zhiwei {
43782a68e9e5SLIU Zhiwei     FloatRelation compare = float32_compare(a, b, s);
vmfge32(uint32_t a,uint32_t b,float_status * s)43792a68e9e5SLIU Zhiwei     return compare == float_relation_greater ||
43802a68e9e5SLIU Zhiwei            compare == float_relation_equal;
43812a68e9e5SLIU Zhiwei }
43822a68e9e5SLIU Zhiwei 
43832a68e9e5SLIU Zhiwei static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
43842a68e9e5SLIU Zhiwei {
43852a68e9e5SLIU Zhiwei     FloatRelation compare = float64_compare(a, b, s);
vmfge64(uint64_t a,uint64_t b,float_status * s)43862a68e9e5SLIU Zhiwei     return compare == float_relation_greater ||
43872a68e9e5SLIU Zhiwei            compare == float_relation_equal;
43882a68e9e5SLIU Zhiwei }
43892a68e9e5SLIU Zhiwei 
43902a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
43912a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
43922a68e9e5SLIU Zhiwei GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
GEN_VEXT_CMP_VF(vmfge_vf_h,uint16_t,H2,vmfge16)43932a68e9e5SLIU Zhiwei 
4394121ddbb3SLIU Zhiwei /* Vector Floating-Point Classify Instruction */
4395121ddbb3SLIU Zhiwei target_ulong fclass_h(uint64_t frs1)
4396121ddbb3SLIU Zhiwei {
4397121ddbb3SLIU Zhiwei     float16 f = frs1;
4398121ddbb3SLIU Zhiwei     bool sign = float16_is_neg(f);
4399121ddbb3SLIU Zhiwei 
4400121ddbb3SLIU Zhiwei     if (float16_is_infinity(f)) {
4401121ddbb3SLIU Zhiwei         return sign ? 1 << 0 : 1 << 7;
4402121ddbb3SLIU Zhiwei     } else if (float16_is_zero(f)) {
4403121ddbb3SLIU Zhiwei         return sign ? 1 << 3 : 1 << 4;
4404121ddbb3SLIU Zhiwei     } else if (float16_is_zero_or_denormal(f)) {
4405121ddbb3SLIU Zhiwei         return sign ? 1 << 2 : 1 << 5;
4406121ddbb3SLIU Zhiwei     } else if (float16_is_any_nan(f)) {
4407121ddbb3SLIU Zhiwei         float_status s = { }; /* for snan_bit_is_one */
4408121ddbb3SLIU Zhiwei         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4409121ddbb3SLIU Zhiwei     } else {
4410121ddbb3SLIU Zhiwei         return sign ? 1 << 1 : 1 << 6;
4411121ddbb3SLIU Zhiwei     }
4412121ddbb3SLIU Zhiwei }
4413121ddbb3SLIU Zhiwei 
4414121ddbb3SLIU Zhiwei target_ulong fclass_s(uint64_t frs1)
4415121ddbb3SLIU Zhiwei {
4416121ddbb3SLIU Zhiwei     float32 f = frs1;
fclass_s(uint64_t frs1)4417121ddbb3SLIU Zhiwei     bool sign = float32_is_neg(f);
4418121ddbb3SLIU Zhiwei 
4419121ddbb3SLIU Zhiwei     if (float32_is_infinity(f)) {
4420121ddbb3SLIU Zhiwei         return sign ? 1 << 0 : 1 << 7;
4421121ddbb3SLIU Zhiwei     } else if (float32_is_zero(f)) {
4422121ddbb3SLIU Zhiwei         return sign ? 1 << 3 : 1 << 4;
4423121ddbb3SLIU Zhiwei     } else if (float32_is_zero_or_denormal(f)) {
4424121ddbb3SLIU Zhiwei         return sign ? 1 << 2 : 1 << 5;
4425121ddbb3SLIU Zhiwei     } else if (float32_is_any_nan(f)) {
4426121ddbb3SLIU Zhiwei         float_status s = { }; /* for snan_bit_is_one */
4427121ddbb3SLIU Zhiwei         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4428121ddbb3SLIU Zhiwei     } else {
4429121ddbb3SLIU Zhiwei         return sign ? 1 << 1 : 1 << 6;
4430121ddbb3SLIU Zhiwei     }
4431121ddbb3SLIU Zhiwei }
4432121ddbb3SLIU Zhiwei 
4433121ddbb3SLIU Zhiwei target_ulong fclass_d(uint64_t frs1)
4434121ddbb3SLIU Zhiwei {
4435121ddbb3SLIU Zhiwei     float64 f = frs1;
fclass_d(uint64_t frs1)4436121ddbb3SLIU Zhiwei     bool sign = float64_is_neg(f);
4437121ddbb3SLIU Zhiwei 
4438121ddbb3SLIU Zhiwei     if (float64_is_infinity(f)) {
4439121ddbb3SLIU Zhiwei         return sign ? 1 << 0 : 1 << 7;
4440121ddbb3SLIU Zhiwei     } else if (float64_is_zero(f)) {
4441121ddbb3SLIU Zhiwei         return sign ? 1 << 3 : 1 << 4;
4442121ddbb3SLIU Zhiwei     } else if (float64_is_zero_or_denormal(f)) {
4443121ddbb3SLIU Zhiwei         return sign ? 1 << 2 : 1 << 5;
4444121ddbb3SLIU Zhiwei     } else if (float64_is_any_nan(f)) {
4445121ddbb3SLIU Zhiwei         float_status s = { }; /* for snan_bit_is_one */
4446121ddbb3SLIU Zhiwei         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4447121ddbb3SLIU Zhiwei     } else {
4448121ddbb3SLIU Zhiwei         return sign ? 1 << 1 : 1 << 6;
4449121ddbb3SLIU Zhiwei     }
4450121ddbb3SLIU Zhiwei }
4451121ddbb3SLIU Zhiwei 
4452121ddbb3SLIU Zhiwei RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4453121ddbb3SLIU Zhiwei RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4454121ddbb3SLIU Zhiwei RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
RVVCALL(OPIVV1,vfclass_v_h,OP_UU_H,H2,H2,fclass_h)44555eacf7d8SeopXD GEN_VEXT_V(vfclass_v_h, 2)
44565eacf7d8SeopXD GEN_VEXT_V(vfclass_v_w, 4)
44575eacf7d8SeopXD GEN_VEXT_V(vfclass_v_d, 8)
445864ab5846SLIU Zhiwei 
445964ab5846SLIU Zhiwei /* Vector Floating-Point Merge Instruction */
44605eacf7d8SeopXD 
44613479a814SFrank Chang #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
446264ab5846SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
446364ab5846SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)          \
446464ab5846SLIU Zhiwei {                                                             \
446564ab5846SLIU Zhiwei     uint32_t vm = vext_vm(desc);                              \
446664ab5846SLIU Zhiwei     uint32_t vl = env->vl;                                    \
44675eacf7d8SeopXD     uint32_t esz = sizeof(ETYPE);                             \
44685eacf7d8SeopXD     uint32_t total_elems =                                    \
44695eacf7d8SeopXD         vext_get_total_elems(env, desc, esz);                 \
44705eacf7d8SeopXD     uint32_t vta = vext_vta(desc);                            \
447164ab5846SLIU Zhiwei     uint32_t i;                                               \
447264ab5846SLIU Zhiwei                                                               \
44737f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                         \
4474df4252b2SDaniel Henrique Barboza                                                               \
4475f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                      \
447664ab5846SLIU Zhiwei         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4477c45eff30SWeiwei Li         *((ETYPE *)vd + H(i)) =                               \
4478c45eff30SWeiwei Li             (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
447964ab5846SLIU Zhiwei     }                                                         \
4480f714361eSFrank Chang     env->vstart = 0;                                          \
44815eacf7d8SeopXD     /* set tail elements to 1s */                             \
44825eacf7d8SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);  \
448364ab5846SLIU Zhiwei }
448464ab5846SLIU Zhiwei 
44853479a814SFrank Chang GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
44863479a814SFrank Chang GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
44873479a814SFrank Chang GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
448892100973SLIU Zhiwei 
448992100973SLIU Zhiwei /* Single-Width Floating-Point/Integer Type-Convert Instructions */
449092100973SLIU Zhiwei /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
449192100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
449292100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
449392100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
44945eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
44955eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
44965eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
449792100973SLIU Zhiwei 
449892100973SLIU Zhiwei /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
449992100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
450092100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
450192100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
45025eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
45035eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
45045eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
450592100973SLIU Zhiwei 
450692100973SLIU Zhiwei /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
450792100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
450892100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
450992100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
45105eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
45115eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
45125eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
451392100973SLIU Zhiwei 
451492100973SLIU Zhiwei /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
451592100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
451692100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
451792100973SLIU Zhiwei RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
45185eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
45195eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
45205eacf7d8SeopXD GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
45214514b7b1SLIU Zhiwei 
45224514b7b1SLIU Zhiwei /* Widening Floating-Point/Integer Type-Convert Instructions */
45234514b7b1SLIU Zhiwei /* (TD, T2, TX2) */
45243ce4c09dSFrank Chang #define WOP_UU_B uint16_t, uint8_t,  uint8_t
45254514b7b1SLIU Zhiwei #define WOP_UU_H uint32_t, uint16_t, uint16_t
45264514b7b1SLIU Zhiwei #define WOP_UU_W uint64_t, uint32_t, uint32_t
45273b57254dSWeiwei Li /*
45283b57254dSWeiwei Li  * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
45293b57254dSWeiwei Li  */
45304514b7b1SLIU Zhiwei RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
45314514b7b1SLIU Zhiwei RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
45325eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
45335eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
45344514b7b1SLIU Zhiwei 
45354514b7b1SLIU Zhiwei /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
45364514b7b1SLIU Zhiwei RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
45374514b7b1SLIU Zhiwei RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
45385eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
45395eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
45404514b7b1SLIU Zhiwei 
4541246f8796SWeiwei Li /*
4542246f8796SWeiwei Li  * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4543246f8796SWeiwei Li  */
45443ce4c09dSFrank Chang RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
45454514b7b1SLIU Zhiwei RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
45464514b7b1SLIU Zhiwei RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
45475eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
45485eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
45495eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
45504514b7b1SLIU Zhiwei 
45514514b7b1SLIU Zhiwei /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
45523ce4c09dSFrank Chang RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
45534514b7b1SLIU Zhiwei RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
45544514b7b1SLIU Zhiwei RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
45555eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
45565eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
45575eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
45584514b7b1SLIU Zhiwei 
45594514b7b1SLIU Zhiwei /*
4560246f8796SWeiwei Li  * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
45614514b7b1SLIU Zhiwei  */
45624514b7b1SLIU Zhiwei static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
45634514b7b1SLIU Zhiwei {
45644514b7b1SLIU Zhiwei     return float16_to_float32(a, true, s);
45654514b7b1SLIU Zhiwei }
45664514b7b1SLIU Zhiwei 
45674514b7b1SLIU Zhiwei RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
45684514b7b1SLIU Zhiwei RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
45695eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
RVVCALL(OPFVV1,vfwcvt_f_f_v_h,WOP_UU_H,H4,H2,vfwcvtffv16)45705eacf7d8SeopXD GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
4571878d406eSLIU Zhiwei 
457287b27bfcSWeiwei Li RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
457387b27bfcSWeiwei Li GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
457487b27bfcSWeiwei Li 
4575878d406eSLIU Zhiwei /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4576878d406eSLIU Zhiwei /* (TD, T2, TX2) */
4577ff679b58SFrank Chang #define NOP_UU_B uint8_t,  uint16_t, uint32_t
4578878d406eSLIU Zhiwei #define NOP_UU_H uint16_t, uint32_t, uint32_t
4579878d406eSLIU Zhiwei #define NOP_UU_W uint32_t, uint64_t, uint64_t
4580878d406eSLIU Zhiwei /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4581ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4582ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4583ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
45845eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
45855eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
45865eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
4587878d406eSLIU Zhiwei 
4588878d406eSLIU Zhiwei /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4589ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4590ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4591ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
45925eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
45935eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
45945eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
4595878d406eSLIU Zhiwei 
4596246f8796SWeiwei Li /*
4597246f8796SWeiwei Li  * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4598246f8796SWeiwei Li  */
4599ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4600ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
46015eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
46025eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
4603878d406eSLIU Zhiwei 
4604878d406eSLIU Zhiwei /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4605ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4606ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
46075eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
46085eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
4609878d406eSLIU Zhiwei 
4610878d406eSLIU Zhiwei /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4611878d406eSLIU Zhiwei static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4612878d406eSLIU Zhiwei {
4613878d406eSLIU Zhiwei     return float32_to_float16(a, true, s);
4614878d406eSLIU Zhiwei }
4615878d406eSLIU Zhiwei 
4616ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4617ff679b58SFrank Chang RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
46185eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
RVVCALL(OPFVV1,vfncvt_f_f_w_h,NOP_UU_H,H2,H4,vfncvtffv16)46195eacf7d8SeopXD GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
4620fe5c9ab1SLIU Zhiwei 
462187b27bfcSWeiwei Li RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
462287b27bfcSWeiwei Li GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
462387b27bfcSWeiwei Li 
4624fe5c9ab1SLIU Zhiwei /*
46253b57254dSWeiwei Li  * Vector Reduction Operations
4626fe5c9ab1SLIU Zhiwei  */
4627fe5c9ab1SLIU Zhiwei /* Vector Single-Width Integer Reduction Instructions */
46283479a814SFrank Chang #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4629fe5c9ab1SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4630c45eff30SWeiwei Li                   void *vs2, CPURISCVState *env,          \
4631c45eff30SWeiwei Li                   uint32_t desc)                          \
4632fe5c9ab1SLIU Zhiwei {                                                         \
4633fe5c9ab1SLIU Zhiwei     uint32_t vm = vext_vm(desc);                          \
4634fe5c9ab1SLIU Zhiwei     uint32_t vl = env->vl;                                \
4635df4f52a7SeopXD     uint32_t esz = sizeof(TD);                            \
4636df4f52a7SeopXD     uint32_t vlenb = simd_maxsz(desc);                    \
4637df4f52a7SeopXD     uint32_t vta = vext_vta(desc);                        \
4638fe5c9ab1SLIU Zhiwei     uint32_t i;                                           \
4639fe5c9ab1SLIU Zhiwei     TD s1 =  *((TD *)vs1 + HD(0));                        \
4640fe5c9ab1SLIU Zhiwei                                                           \
4641*3cdd1f45SChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                     \
4642*3cdd1f45SChao Liu                                                           \
4643f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                  \
4644fe5c9ab1SLIU Zhiwei         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4645f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {              \
4646fe5c9ab1SLIU Zhiwei             continue;                                     \
4647fe5c9ab1SLIU Zhiwei         }                                                 \
4648fe5c9ab1SLIU Zhiwei         s1 = OP(s1, (TD)s2);                              \
4649fe5c9ab1SLIU Zhiwei     }                                                     \
465039408f6fSMax Chou     if (vl > 0) {                                         \
4651fe5c9ab1SLIU Zhiwei         *((TD *)vd + HD(0)) = s1;                         \
465239408f6fSMax Chou     }                                                     \
4653f714361eSFrank Chang     env->vstart = 0;                                      \
4654df4f52a7SeopXD     /* set tail elements to 1s */                         \
4655df4f52a7SeopXD     vext_set_elems_1s(vd, vta, esz, vlenb);               \
4656fe5c9ab1SLIU Zhiwei }
4657fe5c9ab1SLIU Zhiwei 
4658fe5c9ab1SLIU Zhiwei /* vd[0] = sum(vs1[0], vs2[*]) */
46593479a814SFrank Chang GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
46603479a814SFrank Chang GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
46613479a814SFrank Chang GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
46623479a814SFrank Chang GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4663fe5c9ab1SLIU Zhiwei 
4664fe5c9ab1SLIU Zhiwei /* vd[0] = maxu(vs1[0], vs2[*]) */
46653479a814SFrank Chang GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
46663479a814SFrank Chang GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
46673479a814SFrank Chang GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
46683479a814SFrank Chang GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4669fe5c9ab1SLIU Zhiwei 
4670fe5c9ab1SLIU Zhiwei /* vd[0] = max(vs1[0], vs2[*]) */
46713479a814SFrank Chang GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
46723479a814SFrank Chang GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
46733479a814SFrank Chang GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
46743479a814SFrank Chang GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4675fe5c9ab1SLIU Zhiwei 
4676fe5c9ab1SLIU Zhiwei /* vd[0] = minu(vs1[0], vs2[*]) */
46773479a814SFrank Chang GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
46783479a814SFrank Chang GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
46793479a814SFrank Chang GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
46803479a814SFrank Chang GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4681fe5c9ab1SLIU Zhiwei 
4682fe5c9ab1SLIU Zhiwei /* vd[0] = min(vs1[0], vs2[*]) */
46833479a814SFrank Chang GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
46843479a814SFrank Chang GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
46853479a814SFrank Chang GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
46863479a814SFrank Chang GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4687fe5c9ab1SLIU Zhiwei 
4688fe5c9ab1SLIU Zhiwei /* vd[0] = and(vs1[0], vs2[*]) */
46893479a814SFrank Chang GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
46903479a814SFrank Chang GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
46913479a814SFrank Chang GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
46923479a814SFrank Chang GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4693fe5c9ab1SLIU Zhiwei 
4694fe5c9ab1SLIU Zhiwei /* vd[0] = or(vs1[0], vs2[*]) */
46953479a814SFrank Chang GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
46963479a814SFrank Chang GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
46973479a814SFrank Chang GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
46983479a814SFrank Chang GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4699fe5c9ab1SLIU Zhiwei 
4700fe5c9ab1SLIU Zhiwei /* vd[0] = xor(vs1[0], vs2[*]) */
47013479a814SFrank Chang GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
47023479a814SFrank Chang GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
47033479a814SFrank Chang GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
47043479a814SFrank Chang GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4705bba71820SLIU Zhiwei 
4706bba71820SLIU Zhiwei /* Vector Widening Integer Reduction Instructions */
4707bba71820SLIU Zhiwei /* signed sum reduction into double-width accumulator */
47083479a814SFrank Chang GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
47093479a814SFrank Chang GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
47103479a814SFrank Chang GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4711bba71820SLIU Zhiwei 
4712bba71820SLIU Zhiwei /* Unsigned sum reduction into double-width accumulator */
47133479a814SFrank Chang GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
47143479a814SFrank Chang GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
47153479a814SFrank Chang GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4716523547f1SLIU Zhiwei 
4717523547f1SLIU Zhiwei /* Vector Single-Width Floating-Point Reduction Instructions */
47183479a814SFrank Chang #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4719523547f1SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4720523547f1SLIU Zhiwei                   void *vs2, CPURISCVState *env,           \
4721523547f1SLIU Zhiwei                   uint32_t desc)                           \
4722523547f1SLIU Zhiwei {                                                          \
4723523547f1SLIU Zhiwei     uint32_t vm = vext_vm(desc);                           \
4724523547f1SLIU Zhiwei     uint32_t vl = env->vl;                                 \
4725df4f52a7SeopXD     uint32_t esz = sizeof(TD);                             \
4726df4f52a7SeopXD     uint32_t vlenb = simd_maxsz(desc);                     \
4727df4f52a7SeopXD     uint32_t vta = vext_vta(desc);                         \
4728523547f1SLIU Zhiwei     uint32_t i;                                            \
4729523547f1SLIU Zhiwei     TD s1 =  *((TD *)vs1 + HD(0));                         \
4730523547f1SLIU Zhiwei                                                            \
4731*3cdd1f45SChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                      \
4732*3cdd1f45SChao Liu                                                            \
4733f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                   \
4734523547f1SLIU Zhiwei         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4735f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {               \
4736523547f1SLIU Zhiwei             continue;                                      \
4737523547f1SLIU Zhiwei         }                                                  \
4738523547f1SLIU Zhiwei         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4739523547f1SLIU Zhiwei     }                                                      \
474039408f6fSMax Chou     if (vl > 0) {                                          \
4741523547f1SLIU Zhiwei         *((TD *)vd + HD(0)) = s1;                          \
474239408f6fSMax Chou     }                                                      \
4743f714361eSFrank Chang     env->vstart = 0;                                       \
4744df4f52a7SeopXD     /* set tail elements to 1s */                          \
4745df4f52a7SeopXD     vext_set_elems_1s(vd, vta, esz, vlenb);                \
4746523547f1SLIU Zhiwei }
4747523547f1SLIU Zhiwei 
4748523547f1SLIU Zhiwei /* Unordered sum */
4749a3ab69f9SYang Liu GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4750a3ab69f9SYang Liu GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4751a3ab69f9SYang Liu GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4752a3ab69f9SYang Liu 
4753a3ab69f9SYang Liu /* Ordered sum */
4754a3ab69f9SYang Liu GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4755a3ab69f9SYang Liu GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4756a3ab69f9SYang Liu GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4757523547f1SLIU Zhiwei 
4758523547f1SLIU Zhiwei /* Maximum value */
4759246f8796SWeiwei Li GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4760246f8796SWeiwei Li               float16_maximum_number)
4761246f8796SWeiwei Li GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4762246f8796SWeiwei Li               float32_maximum_number)
4763246f8796SWeiwei Li GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4764246f8796SWeiwei Li               float64_maximum_number)
4765523547f1SLIU Zhiwei 
4766523547f1SLIU Zhiwei /* Minimum value */
4767246f8796SWeiwei Li GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4768246f8796SWeiwei Li               float16_minimum_number)
4769246f8796SWeiwei Li GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4770246f8796SWeiwei Li               float32_minimum_number)
4771246f8796SWeiwei Li GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4772246f8796SWeiwei Li               float64_minimum_number)
4773696b0c26SLIU Zhiwei 
47745bda21c0SYang Liu /* Vector Widening Floating-Point Add Instructions */
fwadd32(uint64_t a,uint32_t b,float_status * s)47755bda21c0SYang Liu static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
47765bda21c0SYang Liu {
47775bda21c0SYang Liu     return float32_add(a, float16_to_float32(b, true, s), s);
47785bda21c0SYang Liu }
47795bda21c0SYang Liu 
47805bda21c0SYang Liu static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
47815bda21c0SYang Liu {
GEN_VEXT_FRED(vfwredusum_vs_h,uint32_t,uint16_t,H4,H2,fwadd16)47825bda21c0SYang Liu     return float64_add(a, float32_to_float64(b, s), s);
47835bda21c0SYang Liu }
47845bda21c0SYang Liu 
4785696b0c26SLIU Zhiwei /* Vector Widening Floating-Point Reduction Instructions */
4786a3ab69f9SYang Liu /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4787a3ab69f9SYang Liu GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4788a3ab69f9SYang Liu GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4789a3ab69f9SYang Liu GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4790a3ab69f9SYang Liu GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4791c21f34aeSLIU Zhiwei 
4792c21f34aeSLIU Zhiwei /*
47933b57254dSWeiwei Li  * Vector Mask Operations
4794c21f34aeSLIU Zhiwei  */
4795c21f34aeSLIU Zhiwei /* Vector Mask-Register Logical Instructions */
4796c21f34aeSLIU Zhiwei #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4797c21f34aeSLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4798c21f34aeSLIU Zhiwei                   void *vs2, CPURISCVState *env,          \
4799c21f34aeSLIU Zhiwei                   uint32_t desc)                          \
4800c21f34aeSLIU Zhiwei {                                                         \
4801c21f34aeSLIU Zhiwei     uint32_t vl = env->vl;                                \
480258bc9063SDaniel Henrique Barboza     uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
4803acc6ffd4SeopXD     uint32_t vta_all_1s = vext_vta_all_1s(desc);          \
4804c21f34aeSLIU Zhiwei     uint32_t i;                                           \
4805c21f34aeSLIU Zhiwei     int a, b;                                             \
4806c21f34aeSLIU Zhiwei                                                           \
48077f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                     \
4808df4252b2SDaniel Henrique Barboza                                                           \
4809f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                  \
4810f9298de5SFrank Chang         a = vext_elem_mask(vs1, i);                       \
4811f9298de5SFrank Chang         b = vext_elem_mask(vs2, i);                       \
4812f9298de5SFrank Chang         vext_set_elem_mask(vd, i, OP(b, a));              \
4813c21f34aeSLIU Zhiwei     }                                                     \
4814f714361eSFrank Chang     env->vstart = 0;                                      \
48153b57254dSWeiwei Li     /*
48163b57254dSWeiwei Li      * mask destination register are always tail-agnostic
48173b57254dSWeiwei Li      * set tail elements to 1s
4818acc6ffd4SeopXD      */                                                   \
4819acc6ffd4SeopXD     if (vta_all_1s) {                                     \
4820acc6ffd4SeopXD         for (; i < total_elems; i++) {                    \
4821acc6ffd4SeopXD             vext_set_elem_mask(vd, i, 1);                 \
4822acc6ffd4SeopXD         }                                                 \
4823acc6ffd4SeopXD     }                                                     \
4824c21f34aeSLIU Zhiwei }
4825c21f34aeSLIU Zhiwei 
4826c21f34aeSLIU Zhiwei #define DO_NAND(N, M)  (!(N & M))
4827c21f34aeSLIU Zhiwei #define DO_ANDNOT(N, M)  (N & !M)
4828c21f34aeSLIU Zhiwei #define DO_NOR(N, M)  (!(N | M))
4829c21f34aeSLIU Zhiwei #define DO_ORNOT(N, M)  (N | !M)
4830c21f34aeSLIU Zhiwei #define DO_XNOR(N, M)  (!(N ^ M))
4831c21f34aeSLIU Zhiwei 
4832c21f34aeSLIU Zhiwei GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4833c21f34aeSLIU Zhiwei GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
48349c0d2559SFrank Chang GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4835c21f34aeSLIU Zhiwei GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4836c21f34aeSLIU Zhiwei GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4837c21f34aeSLIU Zhiwei GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
48389c0d2559SFrank Chang GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4839c21f34aeSLIU Zhiwei GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
48402e88f551SLIU Zhiwei 
48410014aa74SFrank Chang /* Vector count population in mask vcpop */
48420014aa74SFrank Chang target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
48432e88f551SLIU Zhiwei                              uint32_t desc)
48442e88f551SLIU Zhiwei {
48452e88f551SLIU Zhiwei     target_ulong cnt = 0;
48462e88f551SLIU Zhiwei     uint32_t vm = vext_vm(desc);
48472e88f551SLIU Zhiwei     uint32_t vl = env->vl;
48482e88f551SLIU Zhiwei     int i;
48492e88f551SLIU Zhiwei 
4850f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {
4851f9298de5SFrank Chang         if (vm || vext_elem_mask(v0, i)) {
4852f9298de5SFrank Chang             if (vext_elem_mask(vs2, i)) {
48532e88f551SLIU Zhiwei                 cnt++;
48542e88f551SLIU Zhiwei             }
48552e88f551SLIU Zhiwei         }
48562e88f551SLIU Zhiwei     }
HELPER(vfirst_m)4857f714361eSFrank Chang     env->vstart = 0;
48582e88f551SLIU Zhiwei     return cnt;
48592e88f551SLIU Zhiwei }
48600db67e1cSLIU Zhiwei 
4861d71a24fcSFrank Chang /* vfirst find-first-set mask bit */
4862d71a24fcSFrank Chang target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
48630db67e1cSLIU Zhiwei                               uint32_t desc)
48640db67e1cSLIU Zhiwei {
48650db67e1cSLIU Zhiwei     uint32_t vm = vext_vm(desc);
48660db67e1cSLIU Zhiwei     uint32_t vl = env->vl;
48670db67e1cSLIU Zhiwei     int i;
48680db67e1cSLIU Zhiwei 
4869f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {
4870f9298de5SFrank Chang         if (vm || vext_elem_mask(v0, i)) {
4871f9298de5SFrank Chang             if (vext_elem_mask(vs2, i)) {
48720db67e1cSLIU Zhiwei                 return i;
48730db67e1cSLIU Zhiwei             }
48740db67e1cSLIU Zhiwei         }
48750db67e1cSLIU Zhiwei     }
4876f714361eSFrank Chang     env->vstart = 0;
48770db67e1cSLIU Zhiwei     return -1LL;
48780db67e1cSLIU Zhiwei }
487981fbf7daSLIU Zhiwei 
488081fbf7daSLIU Zhiwei enum set_mask_type {
vmsetm(void * vd,void * v0,void * vs2,CPURISCVState * env,uint32_t desc,enum set_mask_type type)488181fbf7daSLIU Zhiwei     ONLY_FIRST = 1,
488281fbf7daSLIU Zhiwei     INCLUDE_FIRST,
488381fbf7daSLIU Zhiwei     BEFORE_FIRST,
488481fbf7daSLIU Zhiwei };
488581fbf7daSLIU Zhiwei 
488681fbf7daSLIU Zhiwei static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
488781fbf7daSLIU Zhiwei                    uint32_t desc, enum set_mask_type type)
488881fbf7daSLIU Zhiwei {
488981fbf7daSLIU Zhiwei     uint32_t vm = vext_vm(desc);
489081fbf7daSLIU Zhiwei     uint32_t vl = env->vl;
489158bc9063SDaniel Henrique Barboza     uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
4892acc6ffd4SeopXD     uint32_t vta_all_1s = vext_vta_all_1s(desc);
489335f2d795SYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);
489481fbf7daSLIU Zhiwei     int i;
489581fbf7daSLIU Zhiwei     bool first_mask_bit = false;
489681fbf7daSLIU Zhiwei 
4897*3cdd1f45SChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);
4898*3cdd1f45SChao Liu 
4899f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {
4900f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {
490135f2d795SYueh-Ting (eop) Chen             /* set masked-off elements to 1s */
490235f2d795SYueh-Ting (eop) Chen             if (vma) {
490335f2d795SYueh-Ting (eop) Chen                 vext_set_elem_mask(vd, i, 1);
490435f2d795SYueh-Ting (eop) Chen             }
490581fbf7daSLIU Zhiwei             continue;
490681fbf7daSLIU Zhiwei         }
490781fbf7daSLIU Zhiwei         /* write a zero to all following active elements */
490881fbf7daSLIU Zhiwei         if (first_mask_bit) {
4909f9298de5SFrank Chang             vext_set_elem_mask(vd, i, 0);
491081fbf7daSLIU Zhiwei             continue;
491181fbf7daSLIU Zhiwei         }
4912f9298de5SFrank Chang         if (vext_elem_mask(vs2, i)) {
491381fbf7daSLIU Zhiwei             first_mask_bit = true;
491481fbf7daSLIU Zhiwei             if (type == BEFORE_FIRST) {
4915f9298de5SFrank Chang                 vext_set_elem_mask(vd, i, 0);
491681fbf7daSLIU Zhiwei             } else {
4917f9298de5SFrank Chang                 vext_set_elem_mask(vd, i, 1);
491881fbf7daSLIU Zhiwei             }
491981fbf7daSLIU Zhiwei         } else {
492081fbf7daSLIU Zhiwei             if (type == ONLY_FIRST) {
4921f9298de5SFrank Chang                 vext_set_elem_mask(vd, i, 0);
492281fbf7daSLIU Zhiwei             } else {
4923f9298de5SFrank Chang                 vext_set_elem_mask(vd, i, 1);
492481fbf7daSLIU Zhiwei             }
492581fbf7daSLIU Zhiwei         }
492681fbf7daSLIU Zhiwei     }
4927f714361eSFrank Chang     env->vstart = 0;
49283b57254dSWeiwei Li     /*
49293b57254dSWeiwei Li      * mask destination register are always tail-agnostic
49303b57254dSWeiwei Li      * set tail elements to 1s
49313b57254dSWeiwei Li      */
HELPER(vmsbf_m)4932acc6ffd4SeopXD     if (vta_all_1s) {
4933acc6ffd4SeopXD         for (; i < total_elems; i++) {
4934acc6ffd4SeopXD             vext_set_elem_mask(vd, i, 1);
4935acc6ffd4SeopXD         }
4936acc6ffd4SeopXD     }
493781fbf7daSLIU Zhiwei }
HELPER(vmsif_m)493881fbf7daSLIU Zhiwei 
493981fbf7daSLIU Zhiwei void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
494081fbf7daSLIU Zhiwei                      uint32_t desc)
494181fbf7daSLIU Zhiwei {
494281fbf7daSLIU Zhiwei     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
494381fbf7daSLIU Zhiwei }
HELPER(vmsof_m)494481fbf7daSLIU Zhiwei 
494581fbf7daSLIU Zhiwei void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
494681fbf7daSLIU Zhiwei                      uint32_t desc)
494781fbf7daSLIU Zhiwei {
494881fbf7daSLIU Zhiwei     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
494981fbf7daSLIU Zhiwei }
495081fbf7daSLIU Zhiwei 
495181fbf7daSLIU Zhiwei void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
495281fbf7daSLIU Zhiwei                      uint32_t desc)
495381fbf7daSLIU Zhiwei {
495481fbf7daSLIU Zhiwei     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
495581fbf7daSLIU Zhiwei }
495678d90cfeSLIU Zhiwei 
495778d90cfeSLIU Zhiwei /* Vector Iota Instruction */
49583479a814SFrank Chang #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
495978d90cfeSLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
496078d90cfeSLIU Zhiwei                   uint32_t desc)                                          \
496178d90cfeSLIU Zhiwei {                                                                         \
496278d90cfeSLIU Zhiwei     uint32_t vm = vext_vm(desc);                                          \
496378d90cfeSLIU Zhiwei     uint32_t vl = env->vl;                                                \
4964acc6ffd4SeopXD     uint32_t esz = sizeof(ETYPE);                                         \
4965acc6ffd4SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);          \
4966acc6ffd4SeopXD     uint32_t vta = vext_vta(desc);                                        \
496735f2d795SYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                        \
496878d90cfeSLIU Zhiwei     uint32_t sum = 0;                                                     \
496978d90cfeSLIU Zhiwei     int i;                                                                \
497078d90cfeSLIU Zhiwei                                                                           \
4971*3cdd1f45SChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                     \
4972*3cdd1f45SChao Liu                                                                           \
4973f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                                  \
4974f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                              \
497535f2d795SYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                           \
497635f2d795SYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);           \
497778d90cfeSLIU Zhiwei             continue;                                                     \
497878d90cfeSLIU Zhiwei         }                                                                 \
497978d90cfeSLIU Zhiwei         *((ETYPE *)vd + H(i)) = sum;                                      \
GEN_VEXT_VIOTA_M(viota_m_b,uint8_t,H1)4980f9298de5SFrank Chang         if (vext_elem_mask(vs2, i)) {                                     \
498178d90cfeSLIU Zhiwei             sum++;                                                        \
498278d90cfeSLIU Zhiwei         }                                                                 \
498378d90cfeSLIU Zhiwei     }                                                                     \
4984f714361eSFrank Chang     env->vstart = 0;                                                      \
4985acc6ffd4SeopXD     /* set tail elements to 1s */                                         \
4986acc6ffd4SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);              \
498778d90cfeSLIU Zhiwei }
498878d90cfeSLIU Zhiwei 
49893479a814SFrank Chang GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
49903479a814SFrank Chang GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
49913479a814SFrank Chang GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
49923479a814SFrank Chang GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4993126bec3fSLIU Zhiwei 
4994126bec3fSLIU Zhiwei /* Vector Element Index Instruction */
49953479a814SFrank Chang #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4996126bec3fSLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4997126bec3fSLIU Zhiwei {                                                                         \
4998126bec3fSLIU Zhiwei     uint32_t vm = vext_vm(desc);                                          \
4999126bec3fSLIU Zhiwei     uint32_t vl = env->vl;                                                \
5000acc6ffd4SeopXD     uint32_t esz = sizeof(ETYPE);                                         \
5001acc6ffd4SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);          \
5002acc6ffd4SeopXD     uint32_t vta = vext_vta(desc);                                        \
500335f2d795SYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                        \
5004126bec3fSLIU Zhiwei     int i;                                                                \
5005126bec3fSLIU Zhiwei                                                                           \
50067f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                     \
5007df4252b2SDaniel Henrique Barboza                                                                           \
5008f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                                  \
5009f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                              \
501035f2d795SYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                           \
501135f2d795SYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);           \
5012126bec3fSLIU Zhiwei             continue;                                                     \
5013126bec3fSLIU Zhiwei         }                                                                 \
5014126bec3fSLIU Zhiwei         *((ETYPE *)vd + H(i)) = i;                                        \
5015126bec3fSLIU Zhiwei     }                                                                     \
5016f714361eSFrank Chang     env->vstart = 0;                                                      \
5017acc6ffd4SeopXD     /* set tail elements to 1s */                                         \
5018acc6ffd4SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);              \
5019126bec3fSLIU Zhiwei }
5020126bec3fSLIU Zhiwei 
50213479a814SFrank Chang GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
50223479a814SFrank Chang GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
50233479a814SFrank Chang GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
50243479a814SFrank Chang GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
5025ec17e036SLIU Zhiwei 
5026ec17e036SLIU Zhiwei /*
50273b57254dSWeiwei Li  * Vector Permutation Instructions
5028ec17e036SLIU Zhiwei  */
5029ec17e036SLIU Zhiwei 
5030ec17e036SLIU Zhiwei /* Vector Slide Instructions */
50313479a814SFrank Chang #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
5032ec17e036SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
5033ec17e036SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                      \
5034ec17e036SLIU Zhiwei {                                                                         \
5035ec17e036SLIU Zhiwei     uint32_t vm = vext_vm(desc);                                          \
5036ec17e036SLIU Zhiwei     uint32_t vl = env->vl;                                                \
5037803963f7SeopXD     uint32_t esz = sizeof(ETYPE);                                         \
5038803963f7SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);          \
5039803963f7SeopXD     uint32_t vta = vext_vta(desc);                                        \
5040edabcd0eSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                        \
5041f714361eSFrank Chang     target_ulong offset = s1, i_min, i;                                   \
5042ec17e036SLIU Zhiwei                                                                           \
50437f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                     \
5044df4252b2SDaniel Henrique Barboza                                                                           \
5045f714361eSFrank Chang     i_min = MAX(env->vstart, offset);                                     \
5046f714361eSFrank Chang     for (i = i_min; i < vl; i++) {                                        \
5047f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                              \
5048edabcd0eSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                           \
5049edabcd0eSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);           \
5050ec17e036SLIU Zhiwei             continue;                                                     \
5051ec17e036SLIU Zhiwei         }                                                                 \
5052ec17e036SLIU Zhiwei         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
5053ec17e036SLIU Zhiwei     }                                                                     \
5054d3646e31SDaniel Henrique Barboza     env->vstart = 0;                                                      \
5055803963f7SeopXD     /* set tail elements to 1s */                                         \
5056803963f7SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);              \
5057ec17e036SLIU Zhiwei }
5058ec17e036SLIU Zhiwei 
5059ec17e036SLIU Zhiwei /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
50603479a814SFrank Chang GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
50613479a814SFrank Chang GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
50623479a814SFrank Chang GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
50633479a814SFrank Chang GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
5064ec17e036SLIU Zhiwei 
50653479a814SFrank Chang #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
5066ec17e036SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
5067ec17e036SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                      \
5068ec17e036SLIU Zhiwei {                                                                         \
50696438ed61SFrank Chang     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
5070ec17e036SLIU Zhiwei     uint32_t vm = vext_vm(desc);                                          \
5071ec17e036SLIU Zhiwei     uint32_t vl = env->vl;                                                \
5072803963f7SeopXD     uint32_t esz = sizeof(ETYPE);                                         \
5073803963f7SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);          \
5074803963f7SeopXD     uint32_t vta = vext_vta(desc);                                        \
5075edabcd0eSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                        \
5076f3f65c40SAlistair Francis     target_ulong i_max, i_min, i;                                         \
5077ec17e036SLIU Zhiwei                                                                           \
50787f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                     \
5079df4252b2SDaniel Henrique Barboza                                                                           \
5080f3f65c40SAlistair Francis     i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl);                         \
5081f3f65c40SAlistair Francis     i_max = MAX(i_min, env->vstart);                                      \
5082f714361eSFrank Chang     for (i = env->vstart; i < i_max; ++i) {                               \
5083edabcd0eSYueh-Ting (eop) Chen         if (!vm && !vext_elem_mask(v0, i)) {                              \
5084edabcd0eSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                           \
5085edabcd0eSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);           \
5086edabcd0eSYueh-Ting (eop) Chen             continue;                                                     \
5087ec17e036SLIU Zhiwei         }                                                                 \
5088edabcd0eSYueh-Ting (eop) Chen         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));              \
50896438ed61SFrank Chang     }                                                                     \
50906438ed61SFrank Chang                                                                           \
50916438ed61SFrank Chang     for (i = i_max; i < vl; ++i) {                                        \
50926438ed61SFrank Chang         if (vm || vext_elem_mask(v0, i)) {                                \
50936438ed61SFrank Chang             *((ETYPE *)vd + H(i)) = 0;                                    \
50946438ed61SFrank Chang         }                                                                 \
5095ec17e036SLIU Zhiwei     }                                                                     \
5096f714361eSFrank Chang                                                                           \
5097f714361eSFrank Chang     env->vstart = 0;                                                      \
5098803963f7SeopXD     /* set tail elements to 1s */                                         \
5099803963f7SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);              \
5100ec17e036SLIU Zhiwei }
5101ec17e036SLIU Zhiwei 
5102ec17e036SLIU Zhiwei /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
51033479a814SFrank Chang GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
51043479a814SFrank Chang GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
51053479a814SFrank Chang GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
51063479a814SFrank Chang GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
5107ec17e036SLIU Zhiwei 
5108c7b8a421SeopXD #define GEN_VEXT_VSLIE1UP(BITWIDTH, H)                                      \
51098c89d50cSLIU Zhiwei static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1,           \
5110c45eff30SWeiwei Li                                  void *vs2, CPURISCVState *env,             \
5111c45eff30SWeiwei Li                                  uint32_t desc)                             \
5112ec17e036SLIU Zhiwei {                                                                           \
5113c7b8a421SeopXD     typedef uint##BITWIDTH##_t ETYPE;                                       \
5114ec17e036SLIU Zhiwei     uint32_t vm = vext_vm(desc);                                            \
5115ec17e036SLIU Zhiwei     uint32_t vl = env->vl;                                                  \
5116803963f7SeopXD     uint32_t esz = sizeof(ETYPE);                                           \
5117803963f7SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);            \
5118803963f7SeopXD     uint32_t vta = vext_vta(desc);                                          \
5119edabcd0eSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                          \
5120ec17e036SLIU Zhiwei     uint32_t i;                                                             \
5121ec17e036SLIU Zhiwei                                                                             \
51227f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                       \
5123df4252b2SDaniel Henrique Barboza                                                                             \
5124f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                                    \
5125f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                                \
5126edabcd0eSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                             \
5127edabcd0eSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);             \
5128ec17e036SLIU Zhiwei             continue;                                                       \
5129ec17e036SLIU Zhiwei         }                                                                   \
5130ec17e036SLIU Zhiwei         if (i == 0) {                                                       \
5131ec17e036SLIU Zhiwei             *((ETYPE *)vd + H(i)) = s1;                                     \
5132ec17e036SLIU Zhiwei         } else {                                                            \
5133ec17e036SLIU Zhiwei             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
5134ec17e036SLIU Zhiwei         }                                                                   \
5135ec17e036SLIU Zhiwei     }                                                                       \
5136f714361eSFrank Chang     env->vstart = 0;                                                        \
5137803963f7SeopXD     /* set tail elements to 1s */                                           \
5138803963f7SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);                \
5139ec17e036SLIU Zhiwei }
5140ec17e036SLIU Zhiwei 
51418500d4abSFrank Chang GEN_VEXT_VSLIE1UP(8,  H1)
51428500d4abSFrank Chang GEN_VEXT_VSLIE1UP(16, H2)
51438500d4abSFrank Chang GEN_VEXT_VSLIE1UP(32, H4)
51448500d4abSFrank Chang GEN_VEXT_VSLIE1UP(64, H8)
5145ec17e036SLIU Zhiwei 
5146c7b8a421SeopXD #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH)                     \
5147ec17e036SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5148ec17e036SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)              \
5149ec17e036SLIU Zhiwei {                                                                 \
5150c7b8a421SeopXD     vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc);             \
51518500d4abSFrank Chang }
51528500d4abSFrank Chang 
51538500d4abSFrank Chang /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
51548500d4abSFrank Chang GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
51558500d4abSFrank Chang GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
51568500d4abSFrank Chang GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
51578500d4abSFrank Chang GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
51588500d4abSFrank Chang 
5159c7b8a421SeopXD #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H)                                     \
51608c89d50cSLIU Zhiwei static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1,           \
5161c45eff30SWeiwei Li                                    void *vs2, CPURISCVState *env,             \
5162c45eff30SWeiwei Li                                    uint32_t desc)                             \
51638500d4abSFrank Chang {                                                                             \
5164c7b8a421SeopXD     typedef uint##BITWIDTH##_t ETYPE;                                         \
5165ec17e036SLIU Zhiwei     uint32_t vm = vext_vm(desc);                                              \
5166ec17e036SLIU Zhiwei     uint32_t vl = env->vl;                                                    \
5167803963f7SeopXD     uint32_t esz = sizeof(ETYPE);                                             \
5168803963f7SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);              \
5169803963f7SeopXD     uint32_t vta = vext_vta(desc);                                            \
5170edabcd0eSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                            \
5171ec17e036SLIU Zhiwei     uint32_t i;                                                               \
5172ec17e036SLIU Zhiwei                                                                               \
51737f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                         \
5174df4252b2SDaniel Henrique Barboza                                                                               \
5175f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                                      \
5176f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                                  \
5177edabcd0eSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                               \
5178edabcd0eSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);               \
5179ec17e036SLIU Zhiwei             continue;                                                         \
5180ec17e036SLIU Zhiwei         }                                                                     \
5181ec17e036SLIU Zhiwei         if (i == vl - 1) {                                                    \
5182ec17e036SLIU Zhiwei             *((ETYPE *)vd + H(i)) = s1;                                       \
5183ec17e036SLIU Zhiwei         } else {                                                              \
5184ec17e036SLIU Zhiwei             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
5185ec17e036SLIU Zhiwei         }                                                                     \
5186ec17e036SLIU Zhiwei     }                                                                         \
5187f714361eSFrank Chang     env->vstart = 0;                                                          \
5188803963f7SeopXD     /* set tail elements to 1s */                                             \
5189803963f7SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);                  \
5190ec17e036SLIU Zhiwei }
5191ec17e036SLIU Zhiwei 
51928500d4abSFrank Chang GEN_VEXT_VSLIDE1DOWN(8,  H1)
51938500d4abSFrank Chang GEN_VEXT_VSLIDE1DOWN(16, H2)
51948500d4abSFrank Chang GEN_VEXT_VSLIDE1DOWN(32, H4)
51958500d4abSFrank Chang GEN_VEXT_VSLIDE1DOWN(64, H8)
51968500d4abSFrank Chang 
5197c7b8a421SeopXD #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH)                   \
51988500d4abSFrank Chang void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
51998500d4abSFrank Chang                   CPURISCVState *env, uint32_t desc)              \
52008500d4abSFrank Chang {                                                                 \
5201c7b8a421SeopXD     vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc);           \
52028500d4abSFrank Chang }
52038500d4abSFrank Chang 
5204ec17e036SLIU Zhiwei /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
52058500d4abSFrank Chang GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
52068500d4abSFrank Chang GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
52078500d4abSFrank Chang GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
52088500d4abSFrank Chang GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
52098500d4abSFrank Chang 
52108500d4abSFrank Chang /* Vector Floating-Point Slide Instructions */
5211c7b8a421SeopXD #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH)                \
52128500d4abSFrank Chang void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
52138500d4abSFrank Chang                   CPURISCVState *env, uint32_t desc)          \
52148500d4abSFrank Chang {                                                             \
5215c7b8a421SeopXD     vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc);         \
52168500d4abSFrank Chang }
52178500d4abSFrank Chang 
52188500d4abSFrank Chang /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
52198500d4abSFrank Chang GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
52208500d4abSFrank Chang GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
52218500d4abSFrank Chang GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
52228500d4abSFrank Chang 
5223c7b8a421SeopXD #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH)              \
52248500d4abSFrank Chang void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
52258500d4abSFrank Chang                   CPURISCVState *env, uint32_t desc)          \
52268500d4abSFrank Chang {                                                             \
5227c7b8a421SeopXD     vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc);       \
52288500d4abSFrank Chang }
52298500d4abSFrank Chang 
52308500d4abSFrank Chang /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
52318500d4abSFrank Chang GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
52328500d4abSFrank Chang GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
52338500d4abSFrank Chang GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
5234e4b83d5cSLIU Zhiwei 
5235e4b83d5cSLIU Zhiwei /* Vector Register Gather Instruction */
523650bfb45bSFrank Chang #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
5237e4b83d5cSLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
5238e4b83d5cSLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                      \
5239e4b83d5cSLIU Zhiwei {                                                                         \
5240f714361eSFrank Chang     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
5241e4b83d5cSLIU Zhiwei     uint32_t vm = vext_vm(desc);                                          \
5242e4b83d5cSLIU Zhiwei     uint32_t vl = env->vl;                                                \
5243803963f7SeopXD     uint32_t esz = sizeof(TS2);                                           \
5244803963f7SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);          \
5245803963f7SeopXD     uint32_t vta = vext_vta(desc);                                        \
5246edabcd0eSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                        \
5247b11e84b8SFrank Chang     uint64_t index;                                                       \
5248b11e84b8SFrank Chang     uint32_t i;                                                           \
5249e4b83d5cSLIU Zhiwei                                                                           \
52507f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                     \
5251df4252b2SDaniel Henrique Barboza                                                                           \
5252f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                                  \
5253f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                              \
5254edabcd0eSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                           \
5255edabcd0eSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);           \
5256e4b83d5cSLIU Zhiwei             continue;                                                     \
5257e4b83d5cSLIU Zhiwei         }                                                                 \
525850bfb45bSFrank Chang         index = *((TS1 *)vs1 + HS1(i));                                   \
5259e4b83d5cSLIU Zhiwei         if (index >= vlmax) {                                             \
526050bfb45bSFrank Chang             *((TS2 *)vd + HS2(i)) = 0;                                    \
5261e4b83d5cSLIU Zhiwei         } else {                                                          \
526250bfb45bSFrank Chang             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
5263e4b83d5cSLIU Zhiwei         }                                                                 \
5264e4b83d5cSLIU Zhiwei     }                                                                     \
5265f714361eSFrank Chang     env->vstart = 0;                                                      \
5266803963f7SeopXD     /* set tail elements to 1s */                                         \
5267803963f7SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);              \
5268e4b83d5cSLIU Zhiwei }
5269e4b83d5cSLIU Zhiwei 
5270e4b83d5cSLIU Zhiwei /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
527150bfb45bSFrank Chang GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
527250bfb45bSFrank Chang GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
527350bfb45bSFrank Chang GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
527450bfb45bSFrank Chang GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
527550bfb45bSFrank Chang 
527650bfb45bSFrank Chang GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
527750bfb45bSFrank Chang GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
527850bfb45bSFrank Chang GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
527950bfb45bSFrank Chang GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5280e4b83d5cSLIU Zhiwei 
52813479a814SFrank Chang #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
5282e4b83d5cSLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
5283e4b83d5cSLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                      \
5284e4b83d5cSLIU Zhiwei {                                                                         \
52855a9f8e15SFrank Chang     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
5286e4b83d5cSLIU Zhiwei     uint32_t vm = vext_vm(desc);                                          \
5287e4b83d5cSLIU Zhiwei     uint32_t vl = env->vl;                                                \
5288803963f7SeopXD     uint32_t esz = sizeof(ETYPE);                                         \
5289803963f7SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);          \
5290803963f7SeopXD     uint32_t vta = vext_vta(desc);                                        \
5291edabcd0eSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                                        \
5292b11e84b8SFrank Chang     uint64_t index = s1;                                                  \
5293b11e84b8SFrank Chang     uint32_t i;                                                           \
5294e4b83d5cSLIU Zhiwei                                                                           \
52957f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                     \
5296df4252b2SDaniel Henrique Barboza                                                                           \
5297f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                                  \
5298f9298de5SFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                              \
5299edabcd0eSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                           \
5300edabcd0eSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);           \
5301e4b83d5cSLIU Zhiwei             continue;                                                     \
5302e4b83d5cSLIU Zhiwei         }                                                                 \
5303e4b83d5cSLIU Zhiwei         if (index >= vlmax) {                                             \
5304e4b83d5cSLIU Zhiwei             *((ETYPE *)vd + H(i)) = 0;                                    \
5305e4b83d5cSLIU Zhiwei         } else {                                                          \
5306e4b83d5cSLIU Zhiwei             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
5307e4b83d5cSLIU Zhiwei         }                                                                 \
5308e4b83d5cSLIU Zhiwei     }                                                                     \
5309f714361eSFrank Chang     env->vstart = 0;                                                      \
5310803963f7SeopXD     /* set tail elements to 1s */                                         \
5311803963f7SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);              \
5312e4b83d5cSLIU Zhiwei }
5313e4b83d5cSLIU Zhiwei 
5314e4b83d5cSLIU Zhiwei /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
53153479a814SFrank Chang GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
53163479a814SFrank Chang GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
53173479a814SFrank Chang GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
53183479a814SFrank Chang GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
531931bf42a2SLIU Zhiwei 
532031bf42a2SLIU Zhiwei /* Vector Compress Instruction */
53213479a814SFrank Chang #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
532231bf42a2SLIU Zhiwei void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
532331bf42a2SLIU Zhiwei                   CPURISCVState *env, uint32_t desc)                      \
532431bf42a2SLIU Zhiwei {                                                                         \
532531bf42a2SLIU Zhiwei     uint32_t vl = env->vl;                                                \
5326803963f7SeopXD     uint32_t esz = sizeof(ETYPE);                                         \
5327803963f7SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz);          \
5328803963f7SeopXD     uint32_t vta = vext_vta(desc);                                        \
532931bf42a2SLIU Zhiwei     uint32_t num = 0, i;                                                  \
533031bf42a2SLIU Zhiwei                                                                           \
5331*3cdd1f45SChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                                     \
5332*3cdd1f45SChao Liu                                                                           \
5333f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                                  \
5334f9298de5SFrank Chang         if (!vext_elem_mask(vs1, i)) {                                    \
533531bf42a2SLIU Zhiwei             continue;                                                     \
533631bf42a2SLIU Zhiwei         }                                                                 \
533731bf42a2SLIU Zhiwei         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
533831bf42a2SLIU Zhiwei         num++;                                                            \
533931bf42a2SLIU Zhiwei     }                                                                     \
5340f714361eSFrank Chang     env->vstart = 0;                                                      \
5341803963f7SeopXD     /* set tail elements to 1s */                                         \
5342c128d39eSAnton Blanchard     vext_set_elems_1s(vd, vta, num * esz, total_elems * esz);             \
534331bf42a2SLIU Zhiwei }
534431bf42a2SLIU Zhiwei 
534531bf42a2SLIU Zhiwei /* Compress into vd elements of vs2 where vs1 is enabled */
53463479a814SFrank Chang GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
53473479a814SFrank Chang GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
53483479a814SFrank Chang GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
53493479a814SFrank Chang GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5350cd01340eSFrank Chang 
5351f714361eSFrank Chang /* Vector Whole Register Move */
5352f32d82f6SWeiwei Li void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5353f32d82f6SWeiwei Li {
5354f06193c4SWeiwei Li     /* EEW = SEW */
5355f32d82f6SWeiwei Li     uint32_t maxsz = simd_maxsz(desc);
5356f06193c4SWeiwei Li     uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5357f06193c4SWeiwei Li     uint32_t startb = env->vstart * sewb;
5358f06193c4SWeiwei Li     uint32_t i = startb;
5359f714361eSFrank Chang 
53607e53e3ddSDaniel Henrique Barboza     if (startb >= maxsz) {
53617e53e3ddSDaniel Henrique Barboza         env->vstart = 0;
53627e53e3ddSDaniel Henrique Barboza         return;
53637e53e3ddSDaniel Henrique Barboza     }
53647e53e3ddSDaniel Henrique Barboza 
5365768e7b32SDaniel Henrique Barboza     if (HOST_BIG_ENDIAN && i % 8 != 0) {
5366768e7b32SDaniel Henrique Barboza         uint32_t j = ROUND_UP(i, 8);
5367768e7b32SDaniel Henrique Barboza         memcpy((uint8_t *)vd + H1(j - 1),
5368768e7b32SDaniel Henrique Barboza                (uint8_t *)vs2 + H1(j - 1),
5369768e7b32SDaniel Henrique Barboza                j - i);
5370768e7b32SDaniel Henrique Barboza         i = j;
5371768e7b32SDaniel Henrique Barboza     }
5372768e7b32SDaniel Henrique Barboza 
5373f32d82f6SWeiwei Li     memcpy((uint8_t *)vd + H1(i),
5374f32d82f6SWeiwei Li            (uint8_t *)vs2 + H1(i),
5375768e7b32SDaniel Henrique Barboza            maxsz - i);
5376f32d82f6SWeiwei Li 
5377f32d82f6SWeiwei Li     env->vstart = 0;
5378f32d82f6SWeiwei Li }
5379f714361eSFrank Chang 
5380cd01340eSFrank Chang /* Vector Integer Extension */
5381cd01340eSFrank Chang #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
5382cd01340eSFrank Chang void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
5383cd01340eSFrank Chang                   CPURISCVState *env, uint32_t desc)             \
5384cd01340eSFrank Chang {                                                                \
5385cd01340eSFrank Chang     uint32_t vl = env->vl;                                       \
5386cd01340eSFrank Chang     uint32_t vm = vext_vm(desc);                                 \
5387803963f7SeopXD     uint32_t esz = sizeof(ETYPE);                                \
5388803963f7SeopXD     uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5389803963f7SeopXD     uint32_t vta = vext_vta(desc);                               \
5390edabcd0eSYueh-Ting (eop) Chen     uint32_t vma = vext_vma(desc);                               \
5391cd01340eSFrank Chang     uint32_t i;                                                  \
5392cd01340eSFrank Chang                                                                  \
53937f5f3e5aSChao Liu     VSTART_CHECK_EARLY_EXIT(env, vl);                            \
5394df4252b2SDaniel Henrique Barboza                                                                  \
5395f714361eSFrank Chang     for (i = env->vstart; i < vl; i++) {                         \
5396cd01340eSFrank Chang         if (!vm && !vext_elem_mask(v0, i)) {                     \
5397edabcd0eSYueh-Ting (eop) Chen             /* set masked-off elements to 1s */                  \
5398edabcd0eSYueh-Ting (eop) Chen             vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);  \
5399cd01340eSFrank Chang             continue;                                            \
5400cd01340eSFrank Chang         }                                                        \
5401cd01340eSFrank Chang         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
5402cd01340eSFrank Chang     }                                                            \
5403f714361eSFrank Chang     env->vstart = 0;                                             \
5404803963f7SeopXD     /* set tail elements to 1s */                                \
5405803963f7SeopXD     vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);     \
5406cd01340eSFrank Chang }
5407cd01340eSFrank Chang 
5408cd01340eSFrank Chang GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
5409cd01340eSFrank Chang GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5410cd01340eSFrank Chang GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5411cd01340eSFrank Chang GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
5412cd01340eSFrank Chang GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5413cd01340eSFrank Chang GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
5414cd01340eSFrank Chang 
5415cd01340eSFrank Chang GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
5416cd01340eSFrank Chang GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5417cd01340eSFrank Chang GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5418cd01340eSFrank Chang GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
5419cd01340eSFrank Chang GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5420cd01340eSFrank Chang GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
5421