1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/cpu_ldst.h"
26 #include "exec/page-protection.h"
27 #include "exec/helper-proto.h"
28 #include "fpu/softfloat.h"
29 #include "tcg/tcg-gvec-desc.h"
30 #include "internals.h"
31 #include "vector_internals.h"
32 #include <math.h>
33
HELPER(vsetvl)34 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
35 target_ulong s2)
36 {
37 int vlmax, vl;
38 RISCVCPU *cpu = env_archcpu(env);
39 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
40 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
41 uint16_t sew = 8 << vsew;
42 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
43 int xlen = riscv_cpu_xlen(env);
44 bool vill = (s2 >> (xlen - 1)) & 0x1;
45 target_ulong reserved = s2 &
46 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
47 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
48 uint16_t vlen = cpu->cfg.vlenb << 3;
49 int8_t lmul;
50
51 if (vlmul & 4) {
52 /*
53 * Fractional LMUL, check:
54 *
55 * VLEN * LMUL >= SEW
56 * VLEN >> (8 - lmul) >= sew
57 * (vlenb << 3) >> (8 - lmul) >= sew
58 */
59 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
60 vill = true;
61 }
62 }
63
64 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
65 /* only set vill bit. */
66 env->vill = 1;
67 env->vtype = 0;
68 env->vl = 0;
69 env->vstart = 0;
70 return 0;
71 }
72
73 /* lmul encoded as in DisasContext::lmul */
74 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
75 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
76 if (s1 <= vlmax) {
77 vl = s1;
78 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) {
79 vl = (s1 + 1) >> 1;
80 } else {
81 vl = vlmax;
82 }
83 env->vl = vl;
84 env->vtype = s2;
85 env->vstart = 0;
86 env->vill = 0;
87 return vl;
88 }
89
90 /*
91 * Get the maximum number of elements can be operated.
92 *
93 * log2_esz: log2 of element size in bytes.
94 */
vext_max_elems(uint32_t desc,uint32_t log2_esz)95 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
96 {
97 /*
98 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
99 * so vlen in bytes (vlenb) is encoded as maxsz.
100 */
101 uint32_t vlenb = simd_maxsz(desc);
102
103 /* Return VLMAX */
104 int scale = vext_lmul(desc) - log2_esz;
105 return scale < 0 ? vlenb >> -scale : vlenb << scale;
106 }
107
adjust_addr(CPURISCVState * env,target_ulong addr)108 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
109 {
110 return (addr & ~env->cur_pmmask) | env->cur_pmbase;
111 }
112
113 /*
114 * This function checks watchpoint before real load operation.
115 *
116 * In system mode, the TLB API probe_access is enough for watchpoint check.
117 * In user mode, there is no watchpoint support now.
118 *
119 * It will trigger an exception if there is no mapping in TLB
120 * and page table walk can't fill the TLB entry. Then the guest
121 * software can return here after process the exception or never return.
122 */
probe_pages(CPURISCVState * env,target_ulong addr,target_ulong len,uintptr_t ra,MMUAccessType access_type)123 static void probe_pages(CPURISCVState *env, target_ulong addr,
124 target_ulong len, uintptr_t ra,
125 MMUAccessType access_type)
126 {
127 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
128 target_ulong curlen = MIN(pagelen, len);
129 int mmu_index = riscv_env_mmu_index(env, false);
130
131 probe_access(env, adjust_addr(env, addr), curlen, access_type,
132 mmu_index, ra);
133 if (len > curlen) {
134 addr += curlen;
135 curlen = len - curlen;
136 probe_access(env, adjust_addr(env, addr), curlen, access_type,
137 mmu_index, ra);
138 }
139 }
140
vext_set_elem_mask(void * v0,int index,uint8_t value)141 static inline void vext_set_elem_mask(void *v0, int index,
142 uint8_t value)
143 {
144 int idx = index / 64;
145 int pos = index % 64;
146 uint64_t old = ((uint64_t *)v0)[idx];
147 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
148 }
149
150 /* elements operations for load and store */
151 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr,
152 uint32_t idx, void *vd, uintptr_t retaddr);
153 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host);
154
155 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
156 static inline QEMU_ALWAYS_INLINE \
157 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
158 uint32_t idx, void *vd, uintptr_t retaddr) \
159 { \
160 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
161 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
162 } \
163 \
164 static inline QEMU_ALWAYS_INLINE \
165 void NAME##_host(void *vd, uint32_t idx, void *host) \
166 { \
167 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
168 *cur = (ETYPE)LDSUF##_p(host); \
169 }
170
GEN_VEXT_LD_ELEM(lde_b,uint8_t,H1,ldub)171 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub)
172 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw)
173 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl)
174 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq)
175
176 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
177 static inline QEMU_ALWAYS_INLINE \
178 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
179 uint32_t idx, void *vd, uintptr_t retaddr) \
180 { \
181 ETYPE data = *((ETYPE *)vd + H(idx)); \
182 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
183 } \
184 \
185 static inline QEMU_ALWAYS_INLINE \
186 void NAME##_host(void *vd, uint32_t idx, void *host) \
187 { \
188 ETYPE data = *((ETYPE *)vd + H(idx)); \
189 STSUF##_p(host, data); \
190 }
191
192 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb)
193 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw)
194 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl)
195 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq)
196
197 static inline QEMU_ALWAYS_INLINE void
198 vext_continus_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb,
199 void *vd, uint32_t evl, target_ulong addr,
200 uint32_t reg_start, uintptr_t ra, uint32_t esz,
201 bool is_load)
202 {
203 uint32_t i;
204 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) {
205 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra);
206 }
207 }
208
209 static inline QEMU_ALWAYS_INLINE void
vext_continus_ldst_host(CPURISCVState * env,vext_ldst_elem_fn_host * ldst_host,void * vd,uint32_t evl,uint32_t reg_start,void * host,uint32_t esz,bool is_load)210 vext_continus_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host,
211 void *vd, uint32_t evl, uint32_t reg_start, void *host,
212 uint32_t esz, bool is_load)
213 {
214 #if HOST_BIG_ENDIAN
215 for (; reg_start < evl; reg_start++, host += esz) {
216 ldst_host(vd, reg_start, host);
217 }
218 #else
219 if (esz == 1) {
220 uint32_t byte_offset = reg_start * esz;
221 uint32_t size = (evl - reg_start) * esz;
222
223 if (is_load) {
224 memcpy(vd + byte_offset, host, size);
225 } else {
226 memcpy(host, vd + byte_offset, size);
227 }
228 } else {
229 for (; reg_start < evl; reg_start++, host += esz) {
230 ldst_host(vd, reg_start, host);
231 }
232 }
233 #endif
234 }
235
vext_set_tail_elems_1s(target_ulong vl,void * vd,uint32_t desc,uint32_t nf,uint32_t esz,uint32_t max_elems)236 static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
237 uint32_t desc, uint32_t nf,
238 uint32_t esz, uint32_t max_elems)
239 {
240 uint32_t vta = vext_vta(desc);
241 int k;
242
243 if (vta == 0) {
244 return;
245 }
246
247 for (k = 0; k < nf; ++k) {
248 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
249 (k * max_elems + max_elems) * esz);
250 }
251 }
252
253 /*
254 * stride: access vector element from strided memory
255 */
256 static void
vext_ldst_stride(void * vd,void * v0,target_ulong base,target_ulong stride,CPURISCVState * env,uint32_t desc,uint32_t vm,vext_ldst_elem_fn_tlb * ldst_elem,uint32_t log2_esz,uintptr_t ra)257 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride,
258 CPURISCVState *env, uint32_t desc, uint32_t vm,
259 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz,
260 uintptr_t ra)
261 {
262 uint32_t i, k;
263 uint32_t nf = vext_nf(desc);
264 uint32_t max_elems = vext_max_elems(desc, log2_esz);
265 uint32_t esz = 1 << log2_esz;
266 uint32_t vma = vext_vma(desc);
267
268 VSTART_CHECK_EARLY_EXIT(env, env->vl);
269
270 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
271 k = 0;
272 while (k < nf) {
273 if (!vm && !vext_elem_mask(v0, i)) {
274 /* set masked-off elements to 1s */
275 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
276 (i + k * max_elems + 1) * esz);
277 k++;
278 continue;
279 }
280 target_ulong addr = base + stride * i + (k << log2_esz);
281 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
282 k++;
283 }
284 }
285 env->vstart = 0;
286
287 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
288 }
289
290 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
291 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
292 target_ulong stride, CPURISCVState *env, \
293 uint32_t desc) \
294 { \
295 uint32_t vm = vext_vm(desc); \
296 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
297 ctzl(sizeof(ETYPE)), GETPC()); \
298 }
299
GEN_VEXT_LD_STRIDE(vlse8_v,int8_t,lde_b_tlb)300 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb)
301 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb)
302 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb)
303 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb)
304
305 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
306 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
307 target_ulong stride, CPURISCVState *env, \
308 uint32_t desc) \
309 { \
310 uint32_t vm = vext_vm(desc); \
311 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
312 ctzl(sizeof(ETYPE)), GETPC()); \
313 }
314
315 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb)
316 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb)
317 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb)
318 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb)
319
320 /*
321 * unit-stride: access elements stored contiguously in memory
322 */
323
324 /* unmasked unit-stride load and store operation */
325 static inline QEMU_ALWAYS_INLINE void
326 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr,
327 uint32_t elems, uint32_t nf, uint32_t max_elems,
328 uint32_t log2_esz, bool is_load, int mmu_index,
329 vext_ldst_elem_fn_tlb *ldst_tlb,
330 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra)
331 {
332 void *host;
333 int i, k, flags;
334 uint32_t esz = 1 << log2_esz;
335 uint32_t size = (elems * nf) << log2_esz;
336 uint32_t evl = env->vstart + elems;
337 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
338
339 /* Check page permission/pmp/watchpoint/etc. */
340 flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type,
341 mmu_index, true, &host, ra);
342
343 if (flags == 0) {
344 if (nf == 1) {
345 vext_continus_ldst_host(env, ldst_host, vd, evl, env->vstart, host,
346 esz, is_load);
347 } else {
348 for (i = env->vstart; i < evl; ++i) {
349 k = 0;
350 while (k < nf) {
351 ldst_host(vd, i + k * max_elems, host);
352 host += esz;
353 k++;
354 }
355 }
356 }
357 env->vstart += elems;
358 } else {
359 if (nf == 1) {
360 vext_continus_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart,
361 ra, esz, is_load);
362 } else {
363 /* load bytes from guest memory */
364 for (i = env->vstart; i < evl; env->vstart = ++i) {
365 k = 0;
366 while (k < nf) {
367 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
368 vd, ra);
369 addr += esz;
370 k++;
371 }
372 }
373 }
374 }
375 }
376
377 static inline QEMU_ALWAYS_INLINE void
vext_ldst_us(void * vd,target_ulong base,CPURISCVState * env,uint32_t desc,vext_ldst_elem_fn_tlb * ldst_tlb,vext_ldst_elem_fn_host * ldst_host,uint32_t log2_esz,uint32_t evl,uintptr_t ra,bool is_load)378 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
379 vext_ldst_elem_fn_tlb *ldst_tlb,
380 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
381 uint32_t evl, uintptr_t ra, bool is_load)
382 {
383 uint32_t k;
384 target_ulong page_split, elems, addr;
385 uint32_t nf = vext_nf(desc);
386 uint32_t max_elems = vext_max_elems(desc, log2_esz);
387 uint32_t esz = 1 << log2_esz;
388 uint32_t msize = nf * esz;
389 int mmu_index = riscv_env_mmu_index(env, false);
390
391 VSTART_CHECK_EARLY_EXIT(env, evl);
392
393 /* Calculate the page range of first page */
394 addr = base + ((env->vstart * nf) << log2_esz);
395 page_split = -(addr | TARGET_PAGE_MASK);
396 /* Get number of elements */
397 elems = page_split / msize;
398 if (unlikely(env->vstart + elems >= evl)) {
399 elems = evl - env->vstart;
400 }
401
402 /* Load/store elements in the first page */
403 if (likely(elems)) {
404 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
405 is_load, mmu_index, ldst_tlb, ldst_host, ra);
406 }
407
408 /* Load/store elements in the second page */
409 if (unlikely(env->vstart < evl)) {
410 /* Cross page element */
411 if (unlikely(page_split % msize)) {
412 for (k = 0; k < nf; k++) {
413 addr = base + ((env->vstart * nf + k) << log2_esz);
414 ldst_tlb(env, adjust_addr(env, addr),
415 env->vstart + k * max_elems, vd, ra);
416 }
417 env->vstart++;
418 }
419
420 addr = base + ((env->vstart * nf) << log2_esz);
421 /* Get number of elements of second page */
422 elems = evl - env->vstart;
423
424 /* Load/store elements in the second page */
425 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
426 is_load, mmu_index, ldst_tlb, ldst_host, ra);
427 }
428
429 env->vstart = 0;
430 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
431 }
432
433 /*
434 * masked unit-stride load and store operation will be a special case of
435 * stride, stride = NF * sizeof (ETYPE)
436 */
437
438 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
439 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
440 CPURISCVState *env, uint32_t desc) \
441 { \
442 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
443 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
444 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
445 } \
446 \
447 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
448 CPURISCVState *env, uint32_t desc) \
449 { \
450 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
451 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \
452 }
453
GEN_VEXT_LD_US(vle8_v,int8_t,lde_b_tlb,lde_b_host)454 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host)
455 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host)
456 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host)
457 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host)
458
459 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
460 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
461 CPURISCVState *env, uint32_t desc) \
462 { \
463 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
464 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
465 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
466 } \
467 \
468 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
469 CPURISCVState *env, uint32_t desc) \
470 { \
471 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
472 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \
473 }
474
475 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host)
476 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host)
477 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host)
478 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host)
479
480 /*
481 * unit stride mask load and store, EEW = 1
482 */
483 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
484 CPURISCVState *env, uint32_t desc)
485 {
486 /* evl = ceil(vl/8) */
487 uint8_t evl = (env->vl + 7) >> 3;
488 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host,
489 0, evl, GETPC(), true);
490 }
491
HELPER(vsm_v)492 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
493 CPURISCVState *env, uint32_t desc)
494 {
495 /* evl = ceil(vl/8) */
496 uint8_t evl = (env->vl + 7) >> 3;
497 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host,
498 0, evl, GETPC(), false);
499 }
500
501 /*
502 * index: access vector element from indexed memory
503 */
504 typedef target_ulong vext_get_index_addr(target_ulong base,
505 uint32_t idx, void *vs2);
506
507 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
508 static target_ulong NAME(target_ulong base, \
509 uint32_t idx, void *vs2) \
510 { \
511 return (base + *((ETYPE *)vs2 + H(idx))); \
512 }
513
GEN_VEXT_GET_INDEX_ADDR(idx_b,uint8_t,H1)514 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
515 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
516 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
517 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
518
519 static inline void
520 vext_ldst_index(void *vd, void *v0, target_ulong base,
521 void *vs2, CPURISCVState *env, uint32_t desc,
522 vext_get_index_addr get_index_addr,
523 vext_ldst_elem_fn_tlb *ldst_elem,
524 uint32_t log2_esz, uintptr_t ra)
525 {
526 uint32_t i, k;
527 uint32_t nf = vext_nf(desc);
528 uint32_t vm = vext_vm(desc);
529 uint32_t max_elems = vext_max_elems(desc, log2_esz);
530 uint32_t esz = 1 << log2_esz;
531 uint32_t vma = vext_vma(desc);
532
533 VSTART_CHECK_EARLY_EXIT(env, env->vl);
534
535 /* load bytes from guest memory */
536 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
537 k = 0;
538 while (k < nf) {
539 if (!vm && !vext_elem_mask(v0, i)) {
540 /* set masked-off elements to 1s */
541 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
542 (i + k * max_elems + 1) * esz);
543 k++;
544 continue;
545 }
546 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
547 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
548 k++;
549 }
550 }
551 env->vstart = 0;
552
553 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
554 }
555
556 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
557 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
558 void *vs2, CPURISCVState *env, uint32_t desc) \
559 { \
560 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
561 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
562 }
563
GEN_VEXT_LD_INDEX(vlxei8_8_v,int8_t,idx_b,lde_b_tlb)564 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb)
565 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb)
566 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb)
567 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb)
568 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb)
569 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb)
570 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb)
571 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb)
572 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb)
573 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb)
574 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb)
575 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb)
576 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb)
577 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb)
578 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb)
579 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb)
580
581 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
582 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
583 void *vs2, CPURISCVState *env, uint32_t desc) \
584 { \
585 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
586 STORE_FN, ctzl(sizeof(ETYPE)), \
587 GETPC()); \
588 }
589
590 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb)
591 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb)
592 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb)
593 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb)
594 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb)
595 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb)
596 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb)
597 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb)
598 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb)
599 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb)
600 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb)
601 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb)
602 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb)
603 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb)
604 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb)
605 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb)
606
607 /*
608 * unit-stride fault-only-fisrt load instructions
609 */
610 static inline void
611 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env,
612 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb,
613 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra)
614 {
615 uint32_t i, k, vl = 0;
616 uint32_t nf = vext_nf(desc);
617 uint32_t vm = vext_vm(desc);
618 uint32_t max_elems = vext_max_elems(desc, log2_esz);
619 uint32_t esz = 1 << log2_esz;
620 uint32_t msize = nf * esz;
621 uint32_t vma = vext_vma(desc);
622 target_ulong addr, offset, remain, page_split, elems;
623 int mmu_index = riscv_env_mmu_index(env, false);
624
625 VSTART_CHECK_EARLY_EXIT(env, env->vl);
626
627 /* probe every access */
628 for (i = env->vstart; i < env->vl; i++) {
629 if (!vm && !vext_elem_mask(v0, i)) {
630 continue;
631 }
632 addr = adjust_addr(env, base + i * (nf << log2_esz));
633 if (i == 0) {
634 /* Allow fault on first element. */
635 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
636 } else {
637 remain = nf << log2_esz;
638 while (remain > 0) {
639 void *host;
640 int flags;
641
642 offset = -(addr | TARGET_PAGE_MASK);
643
644 /* Probe nonfault on subsequent elements. */
645 flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
646 mmu_index, true, &host, 0);
647
648 /*
649 * Stop if invalid (unmapped) or mmio (transaction may fail).
650 * Do not stop if watchpoint, as the spec says that
651 * first-fault should continue to access the same
652 * elements regardless of any watchpoint.
653 */
654 if (flags & ~TLB_WATCHPOINT) {
655 vl = i;
656 goto ProbeSuccess;
657 }
658 if (remain <= offset) {
659 break;
660 }
661 remain -= offset;
662 addr = adjust_addr(env, addr + offset);
663 }
664 }
665 }
666 ProbeSuccess:
667 /* load bytes from guest memory */
668 if (vl != 0) {
669 env->vl = vl;
670 }
671
672 if (env->vstart < env->vl) {
673 if (vm) {
674 /* Calculate the page range of first page */
675 addr = base + ((env->vstart * nf) << log2_esz);
676 page_split = -(addr | TARGET_PAGE_MASK);
677 /* Get number of elements */
678 elems = page_split / msize;
679 if (unlikely(env->vstart + elems >= env->vl)) {
680 elems = env->vl - env->vstart;
681 }
682
683 /* Load/store elements in the first page */
684 if (likely(elems)) {
685 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
686 log2_esz, true, mmu_index, ldst_tlb,
687 ldst_host, ra);
688 }
689
690 /* Load/store elements in the second page */
691 if (unlikely(env->vstart < env->vl)) {
692 /* Cross page element */
693 if (unlikely(page_split % msize)) {
694 for (k = 0; k < nf; k++) {
695 addr = base + ((env->vstart * nf + k) << log2_esz);
696 ldst_tlb(env, adjust_addr(env, addr),
697 env->vstart + k * max_elems, vd, ra);
698 }
699 env->vstart++;
700 }
701
702 addr = base + ((env->vstart * nf) << log2_esz);
703 /* Get number of elements of second page */
704 elems = env->vl - env->vstart;
705
706 /* Load/store elements in the second page */
707 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
708 log2_esz, true, mmu_index, ldst_tlb,
709 ldst_host, ra);
710 }
711 } else {
712 for (i = env->vstart; i < env->vl; i++) {
713 k = 0;
714 while (k < nf) {
715 if (!vext_elem_mask(v0, i)) {
716 /* set masked-off elements to 1s */
717 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
718 (i + k * max_elems + 1) * esz);
719 k++;
720 continue;
721 }
722 addr = base + ((i * nf + k) << log2_esz);
723 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
724 vd, ra);
725 k++;
726 }
727 }
728 }
729 }
730 env->vstart = 0;
731
732 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
733 }
734
735 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
736 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
737 CPURISCVState *env, uint32_t desc) \
738 { \
739 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \
740 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \
741 }
742
GEN_VEXT_LDFF(vle8ff_v,int8_t,lde_b_tlb,lde_b_host)743 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host)
744 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host)
745 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host)
746 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host)
747
748 #define DO_SWAP(N, M) (M)
749 #define DO_AND(N, M) (N & M)
750 #define DO_XOR(N, M) (N ^ M)
751 #define DO_OR(N, M) (N | M)
752 #define DO_ADD(N, M) (N + M)
753
754 /* Signed min/max */
755 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
756 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
757
758 /*
759 * load and store whole register instructions
760 */
761 static inline QEMU_ALWAYS_INLINE void
762 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
763 vext_ldst_elem_fn_tlb *ldst_tlb,
764 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
765 uintptr_t ra, bool is_load)
766 {
767 target_ulong page_split, elems, addr;
768 uint32_t nf = vext_nf(desc);
769 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
770 uint32_t max_elems = vlenb >> log2_esz;
771 uint32_t evl = nf * max_elems;
772 uint32_t esz = 1 << log2_esz;
773 int mmu_index = riscv_env_mmu_index(env, false);
774
775 /* Calculate the page range of first page */
776 addr = base + (env->vstart << log2_esz);
777 page_split = -(addr | TARGET_PAGE_MASK);
778 /* Get number of elements */
779 elems = page_split / esz;
780 if (unlikely(env->vstart + elems >= evl)) {
781 elems = evl - env->vstart;
782 }
783
784 /* Load/store elements in the first page */
785 if (likely(elems)) {
786 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
787 is_load, mmu_index, ldst_tlb, ldst_host, ra);
788 }
789
790 /* Load/store elements in the second page */
791 if (unlikely(env->vstart < evl)) {
792 /* Cross page element */
793 if (unlikely(page_split % esz)) {
794 addr = base + (env->vstart << log2_esz);
795 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra);
796 env->vstart++;
797 }
798
799 addr = base + (env->vstart << log2_esz);
800 /* Get number of elements of second page */
801 elems = evl - env->vstart;
802
803 /* Load/store elements in the second page */
804 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
805 is_load, mmu_index, ldst_tlb, ldst_host, ra);
806 }
807
808 env->vstart = 0;
809 }
810
811 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
812 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
813 uint32_t desc) \
814 { \
815 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
816 ctzl(sizeof(ETYPE)), GETPC(), true); \
817 }
818
GEN_VEXT_LD_WHOLE(vl1re8_v,int8_t,lde_b_tlb,lde_b_host)819 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host)
820 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host)
821 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host)
822 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host)
823 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host)
824 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host)
825 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host)
826 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host)
827 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host)
828 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host)
829 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host)
830 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host)
831 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host)
832 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host)
833 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host)
834 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host)
835
836 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
837 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
838 uint32_t desc) \
839 { \
840 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
841 ctzl(sizeof(ETYPE)), GETPC(), false); \
842 }
843
844 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host)
845 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host)
846 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host)
847 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host)
848
849 /*
850 * Vector Integer Arithmetic Instructions
851 */
852
853 /* (TD, T1, T2, TX1, TX2) */
854 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
855 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
856 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
857 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
858 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
859 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
860 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
861 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
862 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
863 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
864 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
865 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
866 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
867 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
868 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
869 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
870 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
871 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
872 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
873 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
874 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
875 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
876 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
877
878 #define DO_SUB(N, M) (N - M)
879 #define DO_RSUB(N, M) (M - N)
880
881 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
882 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
883 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
884 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
885 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
886 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
887 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
888 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
889
890 GEN_VEXT_VV(vadd_vv_b, 1)
891 GEN_VEXT_VV(vadd_vv_h, 2)
892 GEN_VEXT_VV(vadd_vv_w, 4)
893 GEN_VEXT_VV(vadd_vv_d, 8)
894 GEN_VEXT_VV(vsub_vv_b, 1)
895 GEN_VEXT_VV(vsub_vv_h, 2)
896 GEN_VEXT_VV(vsub_vv_w, 4)
897 GEN_VEXT_VV(vsub_vv_d, 8)
898
899
900 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
901 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
902 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
903 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
904 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
905 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
906 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
907 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
908 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
909 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
910 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
911 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
912
913 GEN_VEXT_VX(vadd_vx_b, 1)
914 GEN_VEXT_VX(vadd_vx_h, 2)
915 GEN_VEXT_VX(vadd_vx_w, 4)
916 GEN_VEXT_VX(vadd_vx_d, 8)
917 GEN_VEXT_VX(vsub_vx_b, 1)
918 GEN_VEXT_VX(vsub_vx_h, 2)
919 GEN_VEXT_VX(vsub_vx_w, 4)
920 GEN_VEXT_VX(vsub_vx_d, 8)
921 GEN_VEXT_VX(vrsub_vx_b, 1)
922 GEN_VEXT_VX(vrsub_vx_h, 2)
923 GEN_VEXT_VX(vrsub_vx_w, 4)
924 GEN_VEXT_VX(vrsub_vx_d, 8)
925
926 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
927 {
928 intptr_t oprsz = simd_oprsz(desc);
929 intptr_t i;
930
931 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
932 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
933 }
934 }
935
HELPER(vec_rsubs16)936 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
937 {
938 intptr_t oprsz = simd_oprsz(desc);
939 intptr_t i;
940
941 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
942 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
943 }
944 }
945
HELPER(vec_rsubs32)946 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
947 {
948 intptr_t oprsz = simd_oprsz(desc);
949 intptr_t i;
950
951 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
952 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
953 }
954 }
955
HELPER(vec_rsubs64)956 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
957 {
958 intptr_t oprsz = simd_oprsz(desc);
959 intptr_t i;
960
961 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
962 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
963 }
964 }
965
966 /* Vector Widening Integer Add/Subtract */
967 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
968 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
969 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
970 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
971 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
972 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
973 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
974 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
975 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
976 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
977 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
978 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
RVVCALL(OPIVV2,vwaddu_vv_b,WOP_UUU_B,H2,H1,H1,DO_ADD)979 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
980 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
981 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
982 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
983 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
984 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
985 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
986 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
987 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
988 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
989 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
990 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
991 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
992 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
993 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
994 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
995 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
996 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
997 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
998 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
999 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1000 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1001 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1002 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
1003 GEN_VEXT_VV(vwaddu_vv_b, 2)
1004 GEN_VEXT_VV(vwaddu_vv_h, 4)
1005 GEN_VEXT_VV(vwaddu_vv_w, 8)
1006 GEN_VEXT_VV(vwsubu_vv_b, 2)
1007 GEN_VEXT_VV(vwsubu_vv_h, 4)
1008 GEN_VEXT_VV(vwsubu_vv_w, 8)
1009 GEN_VEXT_VV(vwadd_vv_b, 2)
1010 GEN_VEXT_VV(vwadd_vv_h, 4)
1011 GEN_VEXT_VV(vwadd_vv_w, 8)
1012 GEN_VEXT_VV(vwsub_vv_b, 2)
1013 GEN_VEXT_VV(vwsub_vv_h, 4)
1014 GEN_VEXT_VV(vwsub_vv_w, 8)
1015 GEN_VEXT_VV(vwaddu_wv_b, 2)
1016 GEN_VEXT_VV(vwaddu_wv_h, 4)
1017 GEN_VEXT_VV(vwaddu_wv_w, 8)
1018 GEN_VEXT_VV(vwsubu_wv_b, 2)
1019 GEN_VEXT_VV(vwsubu_wv_h, 4)
1020 GEN_VEXT_VV(vwsubu_wv_w, 8)
1021 GEN_VEXT_VV(vwadd_wv_b, 2)
1022 GEN_VEXT_VV(vwadd_wv_h, 4)
1023 GEN_VEXT_VV(vwadd_wv_w, 8)
1024 GEN_VEXT_VV(vwsub_wv_b, 2)
1025 GEN_VEXT_VV(vwsub_wv_h, 4)
1026 GEN_VEXT_VV(vwsub_wv_w, 8)
1027
1028 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1029 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1030 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1031 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1032 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1033 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1034 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1035 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1036 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1037 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1038 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1039 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1040 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1041 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1042 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1043 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1044 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1045 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1046 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1047 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1048 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1049 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1050 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1051 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1052 GEN_VEXT_VX(vwaddu_vx_b, 2)
1053 GEN_VEXT_VX(vwaddu_vx_h, 4)
1054 GEN_VEXT_VX(vwaddu_vx_w, 8)
1055 GEN_VEXT_VX(vwsubu_vx_b, 2)
1056 GEN_VEXT_VX(vwsubu_vx_h, 4)
1057 GEN_VEXT_VX(vwsubu_vx_w, 8)
1058 GEN_VEXT_VX(vwadd_vx_b, 2)
1059 GEN_VEXT_VX(vwadd_vx_h, 4)
1060 GEN_VEXT_VX(vwadd_vx_w, 8)
1061 GEN_VEXT_VX(vwsub_vx_b, 2)
1062 GEN_VEXT_VX(vwsub_vx_h, 4)
1063 GEN_VEXT_VX(vwsub_vx_w, 8)
1064 GEN_VEXT_VX(vwaddu_wx_b, 2)
1065 GEN_VEXT_VX(vwaddu_wx_h, 4)
1066 GEN_VEXT_VX(vwaddu_wx_w, 8)
1067 GEN_VEXT_VX(vwsubu_wx_b, 2)
1068 GEN_VEXT_VX(vwsubu_wx_h, 4)
1069 GEN_VEXT_VX(vwsubu_wx_w, 8)
1070 GEN_VEXT_VX(vwadd_wx_b, 2)
1071 GEN_VEXT_VX(vwadd_wx_h, 4)
1072 GEN_VEXT_VX(vwadd_wx_w, 8)
1073 GEN_VEXT_VX(vwsub_wx_b, 2)
1074 GEN_VEXT_VX(vwsub_wx_h, 4)
1075 GEN_VEXT_VX(vwsub_wx_w, 8)
1076
1077 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1078 #define DO_VADC(N, M, C) (N + M + C)
1079 #define DO_VSBC(N, M, C) (N - M - C)
1080
1081 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
1082 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1083 CPURISCVState *env, uint32_t desc) \
1084 { \
1085 uint32_t vl = env->vl; \
1086 uint32_t esz = sizeof(ETYPE); \
1087 uint32_t total_elems = \
1088 vext_get_total_elems(env, desc, esz); \
1089 uint32_t vta = vext_vta(desc); \
1090 uint32_t i; \
1091 \
1092 VSTART_CHECK_EARLY_EXIT(env, vl); \
1093 \
1094 for (i = env->vstart; i < vl; i++) { \
1095 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1096 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1097 ETYPE carry = vext_elem_mask(v0, i); \
1098 \
1099 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1100 } \
1101 env->vstart = 0; \
1102 /* set tail elements to 1s */ \
1103 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1104 }
1105
1106 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1107 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1108 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1109 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
1110
1111 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1112 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1113 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1114 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
1115
1116 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
1117 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1118 CPURISCVState *env, uint32_t desc) \
1119 { \
1120 uint32_t vl = env->vl; \
1121 uint32_t esz = sizeof(ETYPE); \
1122 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1123 uint32_t vta = vext_vta(desc); \
1124 uint32_t i; \
1125 \
1126 VSTART_CHECK_EARLY_EXIT(env, vl); \
1127 \
1128 for (i = env->vstart; i < vl; i++) { \
1129 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1130 ETYPE carry = vext_elem_mask(v0, i); \
1131 \
1132 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1133 } \
1134 env->vstart = 0; \
1135 /* set tail elements to 1s */ \
1136 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1137 }
1138
1139 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1140 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1141 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1142 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1143
1144 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1145 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1146 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1147 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1148
1149 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1150 (__typeof(N))(N + M) < N)
1151 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1152
1153 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1154 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1155 CPURISCVState *env, uint32_t desc) \
1156 { \
1157 uint32_t vl = env->vl; \
1158 uint32_t vm = vext_vm(desc); \
1159 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1160 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1161 uint32_t i; \
1162 \
1163 VSTART_CHECK_EARLY_EXIT(env, vl); \
1164 \
1165 for (i = env->vstart; i < vl; i++) { \
1166 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1167 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1168 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1169 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1170 } \
1171 env->vstart = 0; \
1172 /*
1173 * mask destination register are always tail-agnostic
1174 * set tail elements to 1s
1175 */ \
1176 if (vta_all_1s) { \
1177 for (; i < total_elems; i++) { \
1178 vext_set_elem_mask(vd, i, 1); \
1179 } \
1180 } \
1181 }
1182
1183 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1184 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1185 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1186 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1187
1188 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1189 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1190 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1191 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1192
1193 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1194 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1195 void *vs2, CPURISCVState *env, uint32_t desc) \
1196 { \
1197 uint32_t vl = env->vl; \
1198 uint32_t vm = vext_vm(desc); \
1199 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1200 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1201 uint32_t i; \
1202 \
1203 VSTART_CHECK_EARLY_EXIT(env, vl); \
1204 \
1205 for (i = env->vstart; i < vl; i++) { \
1206 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1207 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1208 vext_set_elem_mask(vd, i, \
1209 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1210 } \
1211 env->vstart = 0; \
1212 /*
1213 * mask destination register are always tail-agnostic
1214 * set tail elements to 1s
1215 */ \
1216 if (vta_all_1s) { \
1217 for (; i < total_elems; i++) { \
1218 vext_set_elem_mask(vd, i, 1); \
1219 } \
1220 } \
1221 }
1222
1223 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1224 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1225 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1226 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1227
1228 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1229 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1230 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1231 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1232
1233 /* Vector Bitwise Logical Instructions */
1234 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1235 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1236 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1237 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1238 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1239 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1240 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1241 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1242 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1243 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1244 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1245 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1246 GEN_VEXT_VV(vand_vv_b, 1)
1247 GEN_VEXT_VV(vand_vv_h, 2)
1248 GEN_VEXT_VV(vand_vv_w, 4)
1249 GEN_VEXT_VV(vand_vv_d, 8)
1250 GEN_VEXT_VV(vor_vv_b, 1)
1251 GEN_VEXT_VV(vor_vv_h, 2)
1252 GEN_VEXT_VV(vor_vv_w, 4)
1253 GEN_VEXT_VV(vor_vv_d, 8)
1254 GEN_VEXT_VV(vxor_vv_b, 1)
1255 GEN_VEXT_VV(vxor_vv_h, 2)
1256 GEN_VEXT_VV(vxor_vv_w, 4)
1257 GEN_VEXT_VV(vxor_vv_d, 8)
1258
1259 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1260 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1261 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1262 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1263 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1264 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1265 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1266 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1267 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1268 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1269 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1270 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1271 GEN_VEXT_VX(vand_vx_b, 1)
1272 GEN_VEXT_VX(vand_vx_h, 2)
1273 GEN_VEXT_VX(vand_vx_w, 4)
1274 GEN_VEXT_VX(vand_vx_d, 8)
1275 GEN_VEXT_VX(vor_vx_b, 1)
1276 GEN_VEXT_VX(vor_vx_h, 2)
1277 GEN_VEXT_VX(vor_vx_w, 4)
1278 GEN_VEXT_VX(vor_vx_d, 8)
1279 GEN_VEXT_VX(vxor_vx_b, 1)
1280 GEN_VEXT_VX(vxor_vx_h, 2)
1281 GEN_VEXT_VX(vxor_vx_w, 4)
1282 GEN_VEXT_VX(vxor_vx_d, 8)
1283
1284 /* Vector Single-Width Bit Shift Instructions */
1285 #define DO_SLL(N, M) (N << (M))
1286 #define DO_SRL(N, M) (N >> (M))
1287
1288 /* generate the helpers for shift instructions with two vector operators */
1289 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1290 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1291 void *vs2, CPURISCVState *env, uint32_t desc) \
1292 { \
1293 uint32_t vm = vext_vm(desc); \
1294 uint32_t vl = env->vl; \
1295 uint32_t esz = sizeof(TS1); \
1296 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1297 uint32_t vta = vext_vta(desc); \
1298 uint32_t vma = vext_vma(desc); \
1299 uint32_t i; \
1300 \
1301 VSTART_CHECK_EARLY_EXIT(env, vl); \
1302 \
1303 for (i = env->vstart; i < vl; i++) { \
1304 if (!vm && !vext_elem_mask(v0, i)) { \
1305 /* set masked-off elements to 1s */ \
1306 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
1307 continue; \
1308 } \
1309 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1310 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1311 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1312 } \
1313 env->vstart = 0; \
1314 /* set tail elements to 1s */ \
1315 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1316 }
1317
1318 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1319 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1320 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1321 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1322
1323 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1324 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1325 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1326 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1327
1328 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1329 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1330 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1331 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1332
1333 /*
1334 * generate the helpers for shift instructions with one vector and one scalar
1335 */
1336 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1337 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1338 void *vs2, CPURISCVState *env, \
1339 uint32_t desc) \
1340 { \
1341 uint32_t vm = vext_vm(desc); \
1342 uint32_t vl = env->vl; \
1343 uint32_t esz = sizeof(TD); \
1344 uint32_t total_elems = \
1345 vext_get_total_elems(env, desc, esz); \
1346 uint32_t vta = vext_vta(desc); \
1347 uint32_t vma = vext_vma(desc); \
1348 uint32_t i; \
1349 \
1350 VSTART_CHECK_EARLY_EXIT(env, vl); \
1351 \
1352 for (i = env->vstart; i < vl; i++) { \
1353 if (!vm && !vext_elem_mask(v0, i)) { \
1354 /* set masked-off elements to 1s */ \
1355 vext_set_elems_1s(vd, vma, i * esz, \
1356 (i + 1) * esz); \
1357 continue; \
1358 } \
1359 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1360 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1361 } \
1362 env->vstart = 0; \
1363 /* set tail elements to 1s */ \
1364 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
1365 }
1366
1367 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1368 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1369 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1370 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1371
1372 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1373 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1374 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1375 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1376
1377 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1378 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1379 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1380 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1381
1382 /* Vector Narrowing Integer Right Shift Instructions */
1383 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1384 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1385 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1386 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1387 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1388 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1389 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1390 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1391 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1392 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1393 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1394 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1395
1396 /* Vector Integer Comparison Instructions */
1397 #define DO_MSEQ(N, M) (N == M)
1398 #define DO_MSNE(N, M) (N != M)
1399 #define DO_MSLT(N, M) (N < M)
1400 #define DO_MSLE(N, M) (N <= M)
1401 #define DO_MSGT(N, M) (N > M)
1402
1403 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1404 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1405 CPURISCVState *env, uint32_t desc) \
1406 { \
1407 uint32_t vm = vext_vm(desc); \
1408 uint32_t vl = env->vl; \
1409 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1410 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1411 uint32_t vma = vext_vma(desc); \
1412 uint32_t i; \
1413 \
1414 VSTART_CHECK_EARLY_EXIT(env, vl); \
1415 \
1416 for (i = env->vstart; i < vl; i++) { \
1417 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1418 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1419 if (!vm && !vext_elem_mask(v0, i)) { \
1420 /* set masked-off elements to 1s */ \
1421 if (vma) { \
1422 vext_set_elem_mask(vd, i, 1); \
1423 } \
1424 continue; \
1425 } \
1426 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1427 } \
1428 env->vstart = 0; \
1429 /*
1430 * mask destination register are always tail-agnostic
1431 * set tail elements to 1s
1432 */ \
1433 if (vta_all_1s) { \
1434 for (; i < total_elems; i++) { \
1435 vext_set_elem_mask(vd, i, 1); \
1436 } \
1437 } \
1438 }
1439
1440 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1441 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1442 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1443 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1444
1445 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1446 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1447 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1448 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1449
1450 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1451 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1452 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1453 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1454
1455 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1456 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1457 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1458 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1459
1460 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1461 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1462 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1463 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1464
1465 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1466 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1467 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1468 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1469
1470 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1471 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1472 CPURISCVState *env, uint32_t desc) \
1473 { \
1474 uint32_t vm = vext_vm(desc); \
1475 uint32_t vl = env->vl; \
1476 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1477 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1478 uint32_t vma = vext_vma(desc); \
1479 uint32_t i; \
1480 \
1481 VSTART_CHECK_EARLY_EXIT(env, vl); \
1482 \
1483 for (i = env->vstart; i < vl; i++) { \
1484 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1485 if (!vm && !vext_elem_mask(v0, i)) { \
1486 /* set masked-off elements to 1s */ \
1487 if (vma) { \
1488 vext_set_elem_mask(vd, i, 1); \
1489 } \
1490 continue; \
1491 } \
1492 vext_set_elem_mask(vd, i, \
1493 DO_OP(s2, (ETYPE)(target_long)s1)); \
1494 } \
1495 env->vstart = 0; \
1496 /*
1497 * mask destination register are always tail-agnostic
1498 * set tail elements to 1s
1499 */ \
1500 if (vta_all_1s) { \
1501 for (; i < total_elems; i++) { \
1502 vext_set_elem_mask(vd, i, 1); \
1503 } \
1504 } \
1505 }
1506
1507 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1508 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1509 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1510 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1511
1512 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1513 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1514 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1515 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1516
1517 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1518 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1519 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1520 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1521
1522 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1523 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1524 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1525 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1526
1527 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1528 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1529 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1530 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1531
1532 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1533 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1534 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1535 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1536
1537 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1538 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1539 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1540 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1541
1542 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1543 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1544 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1545 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1546
1547 /* Vector Integer Min/Max Instructions */
1548 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1549 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1550 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1551 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1552 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1553 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1554 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1555 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1556 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1557 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1558 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1559 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1560 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1561 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1562 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1563 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1564 GEN_VEXT_VV(vminu_vv_b, 1)
1565 GEN_VEXT_VV(vminu_vv_h, 2)
1566 GEN_VEXT_VV(vminu_vv_w, 4)
1567 GEN_VEXT_VV(vminu_vv_d, 8)
1568 GEN_VEXT_VV(vmin_vv_b, 1)
1569 GEN_VEXT_VV(vmin_vv_h, 2)
1570 GEN_VEXT_VV(vmin_vv_w, 4)
1571 GEN_VEXT_VV(vmin_vv_d, 8)
1572 GEN_VEXT_VV(vmaxu_vv_b, 1)
1573 GEN_VEXT_VV(vmaxu_vv_h, 2)
1574 GEN_VEXT_VV(vmaxu_vv_w, 4)
1575 GEN_VEXT_VV(vmaxu_vv_d, 8)
1576 GEN_VEXT_VV(vmax_vv_b, 1)
1577 GEN_VEXT_VV(vmax_vv_h, 2)
1578 GEN_VEXT_VV(vmax_vv_w, 4)
1579 GEN_VEXT_VV(vmax_vv_d, 8)
1580
1581 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1582 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1583 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1584 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1585 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1586 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1587 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1588 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1589 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1590 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1591 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1592 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1593 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1594 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1595 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1596 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1597 GEN_VEXT_VX(vminu_vx_b, 1)
1598 GEN_VEXT_VX(vminu_vx_h, 2)
1599 GEN_VEXT_VX(vminu_vx_w, 4)
1600 GEN_VEXT_VX(vminu_vx_d, 8)
1601 GEN_VEXT_VX(vmin_vx_b, 1)
1602 GEN_VEXT_VX(vmin_vx_h, 2)
1603 GEN_VEXT_VX(vmin_vx_w, 4)
1604 GEN_VEXT_VX(vmin_vx_d, 8)
1605 GEN_VEXT_VX(vmaxu_vx_b, 1)
1606 GEN_VEXT_VX(vmaxu_vx_h, 2)
1607 GEN_VEXT_VX(vmaxu_vx_w, 4)
1608 GEN_VEXT_VX(vmaxu_vx_d, 8)
1609 GEN_VEXT_VX(vmax_vx_b, 1)
1610 GEN_VEXT_VX(vmax_vx_h, 2)
1611 GEN_VEXT_VX(vmax_vx_w, 4)
1612 GEN_VEXT_VX(vmax_vx_d, 8)
1613
1614 /* Vector Single-Width Integer Multiply Instructions */
1615 #define DO_MUL(N, M) (N * M)
1616 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1617 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1618 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1619 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1620 GEN_VEXT_VV(vmul_vv_b, 1)
1621 GEN_VEXT_VV(vmul_vv_h, 2)
1622 GEN_VEXT_VV(vmul_vv_w, 4)
1623 GEN_VEXT_VV(vmul_vv_d, 8)
1624
1625 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1626 {
1627 return (int16_t)s2 * (int16_t)s1 >> 8;
1628 }
1629
do_mulh_h(int16_t s2,int16_t s1)1630 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1631 {
1632 return (int32_t)s2 * (int32_t)s1 >> 16;
1633 }
1634
do_mulh_w(int32_t s2,int32_t s1)1635 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1636 {
1637 return (int64_t)s2 * (int64_t)s1 >> 32;
1638 }
1639
do_mulh_d(int64_t s2,int64_t s1)1640 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1641 {
1642 uint64_t hi_64, lo_64;
1643
1644 muls64(&lo_64, &hi_64, s1, s2);
1645 return hi_64;
1646 }
1647
do_mulhu_b(uint8_t s2,uint8_t s1)1648 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1649 {
1650 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1651 }
1652
do_mulhu_h(uint16_t s2,uint16_t s1)1653 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1654 {
1655 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1656 }
1657
do_mulhu_w(uint32_t s2,uint32_t s1)1658 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1659 {
1660 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1661 }
1662
do_mulhu_d(uint64_t s2,uint64_t s1)1663 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1664 {
1665 uint64_t hi_64, lo_64;
1666
1667 mulu64(&lo_64, &hi_64, s2, s1);
1668 return hi_64;
1669 }
1670
do_mulhsu_b(int8_t s2,uint8_t s1)1671 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1672 {
1673 return (int16_t)s2 * (uint16_t)s1 >> 8;
1674 }
1675
do_mulhsu_h(int16_t s2,uint16_t s1)1676 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1677 {
1678 return (int32_t)s2 * (uint32_t)s1 >> 16;
1679 }
1680
do_mulhsu_w(int32_t s2,uint32_t s1)1681 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1682 {
1683 return (int64_t)s2 * (uint64_t)s1 >> 32;
1684 }
1685
1686 /*
1687 * Let A = signed operand,
1688 * B = unsigned operand
1689 * P = mulu64(A, B), unsigned product
1690 *
1691 * LET X = 2 ** 64 - A, 2's complement of A
1692 * SP = signed product
1693 * THEN
1694 * IF A < 0
1695 * SP = -X * B
1696 * = -(2 ** 64 - A) * B
1697 * = A * B - 2 ** 64 * B
1698 * = P - 2 ** 64 * B
1699 * ELSE
1700 * SP = P
1701 * THEN
1702 * HI_P -= (A < 0 ? B : 0)
1703 */
1704
do_mulhsu_d(int64_t s2,uint64_t s1)1705 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1706 {
1707 uint64_t hi_64, lo_64;
1708
1709 mulu64(&lo_64, &hi_64, s2, s1);
1710
1711 hi_64 -= s2 < 0 ? s1 : 0;
1712 return hi_64;
1713 }
1714
1715 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1716 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1717 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1718 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1719 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1720 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1721 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1722 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1723 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1724 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1725 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1726 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1727 GEN_VEXT_VV(vmulh_vv_b, 1)
1728 GEN_VEXT_VV(vmulh_vv_h, 2)
1729 GEN_VEXT_VV(vmulh_vv_w, 4)
1730 GEN_VEXT_VV(vmulh_vv_d, 8)
1731 GEN_VEXT_VV(vmulhu_vv_b, 1)
1732 GEN_VEXT_VV(vmulhu_vv_h, 2)
1733 GEN_VEXT_VV(vmulhu_vv_w, 4)
1734 GEN_VEXT_VV(vmulhu_vv_d, 8)
1735 GEN_VEXT_VV(vmulhsu_vv_b, 1)
1736 GEN_VEXT_VV(vmulhsu_vv_h, 2)
1737 GEN_VEXT_VV(vmulhsu_vv_w, 4)
1738 GEN_VEXT_VV(vmulhsu_vv_d, 8)
1739
1740 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1741 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1742 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1743 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1744 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1745 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1746 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1747 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1748 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1749 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1750 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1751 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1752 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1753 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1754 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1755 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1756 GEN_VEXT_VX(vmul_vx_b, 1)
1757 GEN_VEXT_VX(vmul_vx_h, 2)
1758 GEN_VEXT_VX(vmul_vx_w, 4)
1759 GEN_VEXT_VX(vmul_vx_d, 8)
1760 GEN_VEXT_VX(vmulh_vx_b, 1)
1761 GEN_VEXT_VX(vmulh_vx_h, 2)
1762 GEN_VEXT_VX(vmulh_vx_w, 4)
1763 GEN_VEXT_VX(vmulh_vx_d, 8)
1764 GEN_VEXT_VX(vmulhu_vx_b, 1)
1765 GEN_VEXT_VX(vmulhu_vx_h, 2)
1766 GEN_VEXT_VX(vmulhu_vx_w, 4)
1767 GEN_VEXT_VX(vmulhu_vx_d, 8)
1768 GEN_VEXT_VX(vmulhsu_vx_b, 1)
1769 GEN_VEXT_VX(vmulhsu_vx_h, 2)
1770 GEN_VEXT_VX(vmulhsu_vx_w, 4)
1771 GEN_VEXT_VX(vmulhsu_vx_d, 8)
1772
1773 /* Vector Integer Divide Instructions */
1774 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1775 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1776 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
1777 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1778 #define DO_REM(N, M) (unlikely(M == 0) ? N : \
1779 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1780
1781 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1782 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1783 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1784 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1785 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1786 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1787 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1788 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1789 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1790 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1791 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1792 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1793 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1794 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1795 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1796 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1797 GEN_VEXT_VV(vdivu_vv_b, 1)
1798 GEN_VEXT_VV(vdivu_vv_h, 2)
1799 GEN_VEXT_VV(vdivu_vv_w, 4)
1800 GEN_VEXT_VV(vdivu_vv_d, 8)
1801 GEN_VEXT_VV(vdiv_vv_b, 1)
1802 GEN_VEXT_VV(vdiv_vv_h, 2)
1803 GEN_VEXT_VV(vdiv_vv_w, 4)
1804 GEN_VEXT_VV(vdiv_vv_d, 8)
1805 GEN_VEXT_VV(vremu_vv_b, 1)
1806 GEN_VEXT_VV(vremu_vv_h, 2)
1807 GEN_VEXT_VV(vremu_vv_w, 4)
1808 GEN_VEXT_VV(vremu_vv_d, 8)
1809 GEN_VEXT_VV(vrem_vv_b, 1)
1810 GEN_VEXT_VV(vrem_vv_h, 2)
1811 GEN_VEXT_VV(vrem_vv_w, 4)
1812 GEN_VEXT_VV(vrem_vv_d, 8)
1813
1814 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1815 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1816 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1817 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1818 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1819 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1820 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1821 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1822 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1823 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1824 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1825 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1826 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1827 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1828 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1829 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1830 GEN_VEXT_VX(vdivu_vx_b, 1)
1831 GEN_VEXT_VX(vdivu_vx_h, 2)
1832 GEN_VEXT_VX(vdivu_vx_w, 4)
1833 GEN_VEXT_VX(vdivu_vx_d, 8)
1834 GEN_VEXT_VX(vdiv_vx_b, 1)
1835 GEN_VEXT_VX(vdiv_vx_h, 2)
1836 GEN_VEXT_VX(vdiv_vx_w, 4)
1837 GEN_VEXT_VX(vdiv_vx_d, 8)
1838 GEN_VEXT_VX(vremu_vx_b, 1)
1839 GEN_VEXT_VX(vremu_vx_h, 2)
1840 GEN_VEXT_VX(vremu_vx_w, 4)
1841 GEN_VEXT_VX(vremu_vx_d, 8)
1842 GEN_VEXT_VX(vrem_vx_b, 1)
1843 GEN_VEXT_VX(vrem_vx_h, 2)
1844 GEN_VEXT_VX(vrem_vx_w, 4)
1845 GEN_VEXT_VX(vrem_vx_d, 8)
1846
1847 /* Vector Widening Integer Multiply Instructions */
1848 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1849 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1850 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1851 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1852 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1853 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1854 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1855 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1856 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1857 GEN_VEXT_VV(vwmul_vv_b, 2)
1858 GEN_VEXT_VV(vwmul_vv_h, 4)
1859 GEN_VEXT_VV(vwmul_vv_w, 8)
1860 GEN_VEXT_VV(vwmulu_vv_b, 2)
1861 GEN_VEXT_VV(vwmulu_vv_h, 4)
1862 GEN_VEXT_VV(vwmulu_vv_w, 8)
1863 GEN_VEXT_VV(vwmulsu_vv_b, 2)
1864 GEN_VEXT_VV(vwmulsu_vv_h, 4)
1865 GEN_VEXT_VV(vwmulsu_vv_w, 8)
1866
1867 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1868 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1869 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1870 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1871 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1872 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1873 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1874 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1875 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1876 GEN_VEXT_VX(vwmul_vx_b, 2)
1877 GEN_VEXT_VX(vwmul_vx_h, 4)
1878 GEN_VEXT_VX(vwmul_vx_w, 8)
1879 GEN_VEXT_VX(vwmulu_vx_b, 2)
1880 GEN_VEXT_VX(vwmulu_vx_h, 4)
1881 GEN_VEXT_VX(vwmulu_vx_w, 8)
1882 GEN_VEXT_VX(vwmulsu_vx_b, 2)
1883 GEN_VEXT_VX(vwmulsu_vx_h, 4)
1884 GEN_VEXT_VX(vwmulsu_vx_w, 8)
1885
1886 /* Vector Single-Width Integer Multiply-Add Instructions */
1887 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1888 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1889 { \
1890 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1891 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1892 TD d = *((TD *)vd + HD(i)); \
1893 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1894 }
1895
1896 #define DO_MACC(N, M, D) (M * N + D)
1897 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1898 #define DO_MADD(N, M, D) (M * D + N)
1899 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1900 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1901 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1902 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1903 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1904 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1905 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1906 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1907 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1908 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1909 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1910 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1911 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1912 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1913 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1914 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1915 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1916 GEN_VEXT_VV(vmacc_vv_b, 1)
1917 GEN_VEXT_VV(vmacc_vv_h, 2)
1918 GEN_VEXT_VV(vmacc_vv_w, 4)
1919 GEN_VEXT_VV(vmacc_vv_d, 8)
1920 GEN_VEXT_VV(vnmsac_vv_b, 1)
1921 GEN_VEXT_VV(vnmsac_vv_h, 2)
1922 GEN_VEXT_VV(vnmsac_vv_w, 4)
1923 GEN_VEXT_VV(vnmsac_vv_d, 8)
1924 GEN_VEXT_VV(vmadd_vv_b, 1)
1925 GEN_VEXT_VV(vmadd_vv_h, 2)
1926 GEN_VEXT_VV(vmadd_vv_w, 4)
1927 GEN_VEXT_VV(vmadd_vv_d, 8)
1928 GEN_VEXT_VV(vnmsub_vv_b, 1)
1929 GEN_VEXT_VV(vnmsub_vv_h, 2)
1930 GEN_VEXT_VV(vnmsub_vv_w, 4)
1931 GEN_VEXT_VV(vnmsub_vv_d, 8)
1932
1933 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1934 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1935 { \
1936 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1937 TD d = *((TD *)vd + HD(i)); \
1938 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1939 }
1940
1941 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1942 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1943 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1944 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1945 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1946 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1947 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1948 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1949 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1950 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1951 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1952 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1953 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1954 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1955 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1956 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1957 GEN_VEXT_VX(vmacc_vx_b, 1)
1958 GEN_VEXT_VX(vmacc_vx_h, 2)
1959 GEN_VEXT_VX(vmacc_vx_w, 4)
1960 GEN_VEXT_VX(vmacc_vx_d, 8)
1961 GEN_VEXT_VX(vnmsac_vx_b, 1)
1962 GEN_VEXT_VX(vnmsac_vx_h, 2)
1963 GEN_VEXT_VX(vnmsac_vx_w, 4)
1964 GEN_VEXT_VX(vnmsac_vx_d, 8)
1965 GEN_VEXT_VX(vmadd_vx_b, 1)
1966 GEN_VEXT_VX(vmadd_vx_h, 2)
1967 GEN_VEXT_VX(vmadd_vx_w, 4)
1968 GEN_VEXT_VX(vmadd_vx_d, 8)
1969 GEN_VEXT_VX(vnmsub_vx_b, 1)
1970 GEN_VEXT_VX(vnmsub_vx_h, 2)
1971 GEN_VEXT_VX(vnmsub_vx_w, 4)
1972 GEN_VEXT_VX(vnmsub_vx_d, 8)
1973
1974 /* Vector Widening Integer Multiply-Add Instructions */
1975 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1976 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1977 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1978 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1979 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1980 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1981 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1982 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1983 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1984 GEN_VEXT_VV(vwmaccu_vv_b, 2)
1985 GEN_VEXT_VV(vwmaccu_vv_h, 4)
1986 GEN_VEXT_VV(vwmaccu_vv_w, 8)
1987 GEN_VEXT_VV(vwmacc_vv_b, 2)
1988 GEN_VEXT_VV(vwmacc_vv_h, 4)
1989 GEN_VEXT_VV(vwmacc_vv_w, 8)
1990 GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1991 GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1992 GEN_VEXT_VV(vwmaccsu_vv_w, 8)
1993
1994 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1995 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1996 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1997 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1998 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1999 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
2000 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
2001 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
2002 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
2003 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
2004 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
2005 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
2006 GEN_VEXT_VX(vwmaccu_vx_b, 2)
2007 GEN_VEXT_VX(vwmaccu_vx_h, 4)
2008 GEN_VEXT_VX(vwmaccu_vx_w, 8)
2009 GEN_VEXT_VX(vwmacc_vx_b, 2)
2010 GEN_VEXT_VX(vwmacc_vx_h, 4)
2011 GEN_VEXT_VX(vwmacc_vx_w, 8)
2012 GEN_VEXT_VX(vwmaccsu_vx_b, 2)
2013 GEN_VEXT_VX(vwmaccsu_vx_h, 4)
2014 GEN_VEXT_VX(vwmaccsu_vx_w, 8)
2015 GEN_VEXT_VX(vwmaccus_vx_b, 2)
2016 GEN_VEXT_VX(vwmaccus_vx_h, 4)
2017 GEN_VEXT_VX(vwmaccus_vx_w, 8)
2018
2019 /* Vector Integer Merge and Move Instructions */
2020 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
2021 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
2022 uint32_t desc) \
2023 { \
2024 uint32_t vl = env->vl; \
2025 uint32_t esz = sizeof(ETYPE); \
2026 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2027 uint32_t vta = vext_vta(desc); \
2028 uint32_t i; \
2029 \
2030 VSTART_CHECK_EARLY_EXIT(env, vl); \
2031 \
2032 for (i = env->vstart; i < vl; i++) { \
2033 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2034 *((ETYPE *)vd + H(i)) = s1; \
2035 } \
2036 env->vstart = 0; \
2037 /* set tail elements to 1s */ \
2038 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2039 }
2040
2041 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2042 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2043 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2044 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
2045
2046 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
2047 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2048 uint32_t desc) \
2049 { \
2050 uint32_t vl = env->vl; \
2051 uint32_t esz = sizeof(ETYPE); \
2052 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2053 uint32_t vta = vext_vta(desc); \
2054 uint32_t i; \
2055 \
2056 VSTART_CHECK_EARLY_EXIT(env, vl); \
2057 \
2058 for (i = env->vstart; i < vl; i++) { \
2059 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2060 } \
2061 env->vstart = 0; \
2062 /* set tail elements to 1s */ \
2063 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2064 }
2065
2066 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2067 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2068 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2069 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
2070
2071 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
2072 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2073 CPURISCVState *env, uint32_t desc) \
2074 { \
2075 uint32_t vl = env->vl; \
2076 uint32_t esz = sizeof(ETYPE); \
2077 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2078 uint32_t vta = vext_vta(desc); \
2079 uint32_t i; \
2080 \
2081 VSTART_CHECK_EARLY_EXIT(env, vl); \
2082 \
2083 for (i = env->vstart; i < vl; i++) { \
2084 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
2085 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2086 } \
2087 env->vstart = 0; \
2088 /* set tail elements to 1s */ \
2089 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2090 }
2091
2092 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2093 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2094 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2095 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
2096
2097 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
2098 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2099 void *vs2, CPURISCVState *env, uint32_t desc) \
2100 { \
2101 uint32_t vl = env->vl; \
2102 uint32_t esz = sizeof(ETYPE); \
2103 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2104 uint32_t vta = vext_vta(desc); \
2105 uint32_t i; \
2106 \
2107 VSTART_CHECK_EARLY_EXIT(env, vl); \
2108 \
2109 for (i = env->vstart; i < vl; i++) { \
2110 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
2111 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
2112 (ETYPE)(target_long)s1); \
2113 *((ETYPE *)vd + H(i)) = d; \
2114 } \
2115 env->vstart = 0; \
2116 /* set tail elements to 1s */ \
2117 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2118 }
2119
2120 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2121 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2122 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2123 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
2124
2125 /*
2126 * Vector Fixed-Point Arithmetic Instructions
2127 */
2128
2129 /* Vector Single-Width Saturating Add and Subtract */
2130
2131 /*
2132 * As fixed point instructions probably have round mode and saturation,
2133 * define common macros for fixed point here.
2134 */
2135 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2136 CPURISCVState *env, int vxrm);
2137
2138 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2139 static inline void \
2140 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2141 CPURISCVState *env, int vxrm) \
2142 { \
2143 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2144 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2145 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2146 }
2147
2148 static inline void
vext_vv_rm_1(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivv2_rm_fn * fn,uint32_t vma,uint32_t esz)2149 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2150 CPURISCVState *env,
2151 uint32_t vl, uint32_t vm, int vxrm,
2152 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
2153 {
2154 for (uint32_t i = env->vstart; i < vl; i++) {
2155 if (!vm && !vext_elem_mask(v0, i)) {
2156 /* set masked-off elements to 1s */
2157 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2158 continue;
2159 }
2160 fn(vd, vs1, vs2, i, env, vxrm);
2161 }
2162 env->vstart = 0;
2163 }
2164
2165 static inline void
vext_vv_rm_2(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t desc,opivv2_rm_fn * fn,uint32_t esz)2166 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2167 CPURISCVState *env,
2168 uint32_t desc,
2169 opivv2_rm_fn *fn, uint32_t esz)
2170 {
2171 uint32_t vm = vext_vm(desc);
2172 uint32_t vl = env->vl;
2173 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2174 uint32_t vta = vext_vta(desc);
2175 uint32_t vma = vext_vma(desc);
2176
2177 VSTART_CHECK_EARLY_EXIT(env, vl);
2178
2179 switch (env->vxrm) {
2180 case 0: /* rnu */
2181 vext_vv_rm_1(vd, v0, vs1, vs2,
2182 env, vl, vm, 0, fn, vma, esz);
2183 break;
2184 case 1: /* rne */
2185 vext_vv_rm_1(vd, v0, vs1, vs2,
2186 env, vl, vm, 1, fn, vma, esz);
2187 break;
2188 case 2: /* rdn */
2189 vext_vv_rm_1(vd, v0, vs1, vs2,
2190 env, vl, vm, 2, fn, vma, esz);
2191 break;
2192 default: /* rod */
2193 vext_vv_rm_1(vd, v0, vs1, vs2,
2194 env, vl, vm, 3, fn, vma, esz);
2195 break;
2196 }
2197 /* set tail elements to 1s */
2198 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2199 }
2200
2201 /* generate helpers for fixed point instructions with OPIVV format */
2202 #define GEN_VEXT_VV_RM(NAME, ESZ) \
2203 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2204 CPURISCVState *env, uint32_t desc) \
2205 { \
2206 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2207 do_##NAME, ESZ); \
2208 }
2209
saddu8(CPURISCVState * env,int vxrm,uint8_t a,uint8_t b)2210 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2211 uint8_t b)
2212 {
2213 uint8_t res = a + b;
2214 if (res < a) {
2215 res = UINT8_MAX;
2216 env->vxsat = 0x1;
2217 }
2218 return res;
2219 }
2220
saddu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2221 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2222 uint16_t b)
2223 {
2224 uint16_t res = a + b;
2225 if (res < a) {
2226 res = UINT16_MAX;
2227 env->vxsat = 0x1;
2228 }
2229 return res;
2230 }
2231
saddu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2232 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2233 uint32_t b)
2234 {
2235 uint32_t res = a + b;
2236 if (res < a) {
2237 res = UINT32_MAX;
2238 env->vxsat = 0x1;
2239 }
2240 return res;
2241 }
2242
saddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2243 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2244 uint64_t b)
2245 {
2246 uint64_t res = a + b;
2247 if (res < a) {
2248 res = UINT64_MAX;
2249 env->vxsat = 0x1;
2250 }
2251 return res;
2252 }
2253
2254 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2255 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2256 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2257 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2258 GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2259 GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2260 GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2261 GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
2262
2263 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2264 CPURISCVState *env, int vxrm);
2265
2266 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2267 static inline void \
2268 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2269 CPURISCVState *env, int vxrm) \
2270 { \
2271 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2272 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2273 }
2274
2275 static inline void
vext_vx_rm_1(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivx2_rm_fn * fn,uint32_t vma,uint32_t esz)2276 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2277 CPURISCVState *env,
2278 uint32_t vl, uint32_t vm, int vxrm,
2279 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
2280 {
2281 for (uint32_t i = env->vstart; i < vl; i++) {
2282 if (!vm && !vext_elem_mask(v0, i)) {
2283 /* set masked-off elements to 1s */
2284 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2285 continue;
2286 }
2287 fn(vd, s1, vs2, i, env, vxrm);
2288 }
2289 env->vstart = 0;
2290 }
2291
2292 static inline void
vext_vx_rm_2(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t desc,opivx2_rm_fn * fn,uint32_t esz)2293 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2294 CPURISCVState *env,
2295 uint32_t desc,
2296 opivx2_rm_fn *fn, uint32_t esz)
2297 {
2298 uint32_t vm = vext_vm(desc);
2299 uint32_t vl = env->vl;
2300 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2301 uint32_t vta = vext_vta(desc);
2302 uint32_t vma = vext_vma(desc);
2303
2304 VSTART_CHECK_EARLY_EXIT(env, vl);
2305
2306 switch (env->vxrm) {
2307 case 0: /* rnu */
2308 vext_vx_rm_1(vd, v0, s1, vs2,
2309 env, vl, vm, 0, fn, vma, esz);
2310 break;
2311 case 1: /* rne */
2312 vext_vx_rm_1(vd, v0, s1, vs2,
2313 env, vl, vm, 1, fn, vma, esz);
2314 break;
2315 case 2: /* rdn */
2316 vext_vx_rm_1(vd, v0, s1, vs2,
2317 env, vl, vm, 2, fn, vma, esz);
2318 break;
2319 default: /* rod */
2320 vext_vx_rm_1(vd, v0, s1, vs2,
2321 env, vl, vm, 3, fn, vma, esz);
2322 break;
2323 }
2324 /* set tail elements to 1s */
2325 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2326 }
2327
2328 /* generate helpers for fixed point instructions with OPIVX format */
2329 #define GEN_VEXT_VX_RM(NAME, ESZ) \
2330 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2331 void *vs2, CPURISCVState *env, \
2332 uint32_t desc) \
2333 { \
2334 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2335 do_##NAME, ESZ); \
2336 }
2337
RVVCALL(OPIVX2_RM,vsaddu_vx_b,OP_UUU_B,H1,H1,saddu8)2338 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2339 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2340 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2341 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2342 GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2343 GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2344 GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2345 GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
2346
2347 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2348 {
2349 int8_t res = a + b;
2350 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2351 res = a > 0 ? INT8_MAX : INT8_MIN;
2352 env->vxsat = 0x1;
2353 }
2354 return res;
2355 }
2356
sadd16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2357 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2358 int16_t b)
2359 {
2360 int16_t res = a + b;
2361 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2362 res = a > 0 ? INT16_MAX : INT16_MIN;
2363 env->vxsat = 0x1;
2364 }
2365 return res;
2366 }
2367
sadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2368 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2369 int32_t b)
2370 {
2371 int32_t res = a + b;
2372 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2373 res = a > 0 ? INT32_MAX : INT32_MIN;
2374 env->vxsat = 0x1;
2375 }
2376 return res;
2377 }
2378
sadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2379 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2380 int64_t b)
2381 {
2382 int64_t res = a + b;
2383 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2384 res = a > 0 ? INT64_MAX : INT64_MIN;
2385 env->vxsat = 0x1;
2386 }
2387 return res;
2388 }
2389
RVVCALL(OPIVV2_RM,vsadd_vv_b,OP_SSS_B,H1,H1,H1,sadd8)2390 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2391 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2392 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2393 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2394 GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2395 GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2396 GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2397 GEN_VEXT_VV_RM(vsadd_vv_d, 8)
2398
2399 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2400 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2401 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2402 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2403 GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2404 GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2405 GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2406 GEN_VEXT_VX_RM(vsadd_vx_d, 8)
2407
2408 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2409 uint8_t b)
2410 {
2411 uint8_t res = a - b;
2412 if (res > a) {
2413 res = 0;
2414 env->vxsat = 0x1;
2415 }
2416 return res;
2417 }
2418
ssubu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2419 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2420 uint16_t b)
2421 {
2422 uint16_t res = a - b;
2423 if (res > a) {
2424 res = 0;
2425 env->vxsat = 0x1;
2426 }
2427 return res;
2428 }
2429
ssubu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2430 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2431 uint32_t b)
2432 {
2433 uint32_t res = a - b;
2434 if (res > a) {
2435 res = 0;
2436 env->vxsat = 0x1;
2437 }
2438 return res;
2439 }
2440
ssubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2441 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2442 uint64_t b)
2443 {
2444 uint64_t res = a - b;
2445 if (res > a) {
2446 res = 0;
2447 env->vxsat = 0x1;
2448 }
2449 return res;
2450 }
2451
RVVCALL(OPIVV2_RM,vssubu_vv_b,OP_UUU_B,H1,H1,H1,ssubu8)2452 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2453 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2454 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2455 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2456 GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2457 GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2458 GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2459 GEN_VEXT_VV_RM(vssubu_vv_d, 8)
2460
2461 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2462 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2463 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2464 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2465 GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2466 GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2467 GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2468 GEN_VEXT_VX_RM(vssubu_vx_d, 8)
2469
2470 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2471 {
2472 int8_t res = a - b;
2473 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2474 res = a >= 0 ? INT8_MAX : INT8_MIN;
2475 env->vxsat = 0x1;
2476 }
2477 return res;
2478 }
2479
ssub16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2480 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2481 int16_t b)
2482 {
2483 int16_t res = a - b;
2484 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2485 res = a >= 0 ? INT16_MAX : INT16_MIN;
2486 env->vxsat = 0x1;
2487 }
2488 return res;
2489 }
2490
ssub32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2491 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2492 int32_t b)
2493 {
2494 int32_t res = a - b;
2495 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2496 res = a >= 0 ? INT32_MAX : INT32_MIN;
2497 env->vxsat = 0x1;
2498 }
2499 return res;
2500 }
2501
ssub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2502 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2503 int64_t b)
2504 {
2505 int64_t res = a - b;
2506 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2507 res = a >= 0 ? INT64_MAX : INT64_MIN;
2508 env->vxsat = 0x1;
2509 }
2510 return res;
2511 }
2512
RVVCALL(OPIVV2_RM,vssub_vv_b,OP_SSS_B,H1,H1,H1,ssub8)2513 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2514 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2515 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2516 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2517 GEN_VEXT_VV_RM(vssub_vv_b, 1)
2518 GEN_VEXT_VV_RM(vssub_vv_h, 2)
2519 GEN_VEXT_VV_RM(vssub_vv_w, 4)
2520 GEN_VEXT_VV_RM(vssub_vv_d, 8)
2521
2522 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2523 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2524 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2525 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2526 GEN_VEXT_VX_RM(vssub_vx_b, 1)
2527 GEN_VEXT_VX_RM(vssub_vx_h, 2)
2528 GEN_VEXT_VX_RM(vssub_vx_w, 4)
2529 GEN_VEXT_VX_RM(vssub_vx_d, 8)
2530
2531 /* Vector Single-Width Averaging Add and Subtract */
2532 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2533 {
2534 uint8_t d = extract64(v, shift, 1);
2535 uint8_t d1;
2536 uint64_t D1, D2;
2537
2538 if (shift == 0 || shift > 64) {
2539 return 0;
2540 }
2541
2542 d1 = extract64(v, shift - 1, 1);
2543 D1 = extract64(v, 0, shift);
2544 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2545 return d1;
2546 } else if (vxrm == 1) { /* round-to-nearest-even */
2547 if (shift > 1) {
2548 D2 = extract64(v, 0, shift - 1);
2549 return d1 & ((D2 != 0) | d);
2550 } else {
2551 return d1 & d;
2552 }
2553 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2554 return !d & (D1 != 0);
2555 }
2556 return 0; /* round-down (truncate) */
2557 }
2558
aadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2559 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2560 int32_t b)
2561 {
2562 int64_t res = (int64_t)a + b;
2563 uint8_t round = get_round(vxrm, res, 1);
2564
2565 return (res >> 1) + round;
2566 }
2567
aadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2568 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2569 int64_t b)
2570 {
2571 int64_t res = a + b;
2572 uint8_t round = get_round(vxrm, res, 1);
2573 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2574
2575 /* With signed overflow, bit 64 is inverse of bit 63. */
2576 return ((res >> 1) ^ over) + round;
2577 }
2578
RVVCALL(OPIVV2_RM,vaadd_vv_b,OP_SSS_B,H1,H1,H1,aadd32)2579 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2580 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2581 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2582 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2583 GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2584 GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2585 GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2586 GEN_VEXT_VV_RM(vaadd_vv_d, 8)
2587
2588 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2589 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2590 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2591 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2592 GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2593 GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2594 GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2595 GEN_VEXT_VX_RM(vaadd_vx_d, 8)
2596
2597 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2598 uint32_t a, uint32_t b)
2599 {
2600 uint64_t res = (uint64_t)a + b;
2601 uint8_t round = get_round(vxrm, res, 1);
2602
2603 return (res >> 1) + round;
2604 }
2605
aaddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2606 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2607 uint64_t a, uint64_t b)
2608 {
2609 uint64_t res = a + b;
2610 uint8_t round = get_round(vxrm, res, 1);
2611 uint64_t over = (uint64_t)(res < a) << 63;
2612
2613 return ((res >> 1) | over) + round;
2614 }
2615
RVVCALL(OPIVV2_RM,vaaddu_vv_b,OP_UUU_B,H1,H1,H1,aaddu32)2616 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2617 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2618 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2619 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2620 GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2621 GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2622 GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2623 GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
2624
2625 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2626 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2627 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2628 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2629 GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2630 GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2631 GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2632 GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
2633
2634 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2635 int32_t b)
2636 {
2637 int64_t res = (int64_t)a - b;
2638 uint8_t round = get_round(vxrm, res, 1);
2639
2640 return (res >> 1) + round;
2641 }
2642
asub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2643 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2644 int64_t b)
2645 {
2646 int64_t res = (int64_t)a - b;
2647 uint8_t round = get_round(vxrm, res, 1);
2648 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2649
2650 /* With signed overflow, bit 64 is inverse of bit 63. */
2651 return ((res >> 1) ^ over) + round;
2652 }
2653
RVVCALL(OPIVV2_RM,vasub_vv_b,OP_SSS_B,H1,H1,H1,asub32)2654 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2655 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2656 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2657 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2658 GEN_VEXT_VV_RM(vasub_vv_b, 1)
2659 GEN_VEXT_VV_RM(vasub_vv_h, 2)
2660 GEN_VEXT_VV_RM(vasub_vv_w, 4)
2661 GEN_VEXT_VV_RM(vasub_vv_d, 8)
2662
2663 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2664 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2665 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2666 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2667 GEN_VEXT_VX_RM(vasub_vx_b, 1)
2668 GEN_VEXT_VX_RM(vasub_vx_h, 2)
2669 GEN_VEXT_VX_RM(vasub_vx_w, 4)
2670 GEN_VEXT_VX_RM(vasub_vx_d, 8)
2671
2672 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2673 uint32_t a, uint32_t b)
2674 {
2675 int64_t res = (int64_t)a - b;
2676 uint8_t round = get_round(vxrm, res, 1);
2677
2678 return (res >> 1) + round;
2679 }
2680
asubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2681 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2682 uint64_t a, uint64_t b)
2683 {
2684 uint64_t res = (uint64_t)a - b;
2685 uint8_t round = get_round(vxrm, res, 1);
2686 uint64_t over = (uint64_t)(res > a) << 63;
2687
2688 return ((res >> 1) | over) + round;
2689 }
2690
RVVCALL(OPIVV2_RM,vasubu_vv_b,OP_UUU_B,H1,H1,H1,asubu32)2691 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2692 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2693 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2694 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2695 GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2696 GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2697 GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2698 GEN_VEXT_VV_RM(vasubu_vv_d, 8)
2699
2700 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2701 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2702 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2703 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2704 GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2705 GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2706 GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2707 GEN_VEXT_VX_RM(vasubu_vx_d, 8)
2708
2709 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2710 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2711 {
2712 uint8_t round;
2713 int16_t res;
2714
2715 res = (int16_t)a * (int16_t)b;
2716 round = get_round(vxrm, res, 7);
2717 res = (res >> 7) + round;
2718
2719 if (res > INT8_MAX) {
2720 env->vxsat = 0x1;
2721 return INT8_MAX;
2722 } else if (res < INT8_MIN) {
2723 env->vxsat = 0x1;
2724 return INT8_MIN;
2725 } else {
2726 return res;
2727 }
2728 }
2729
vsmul16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2730 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2731 {
2732 uint8_t round;
2733 int32_t res;
2734
2735 res = (int32_t)a * (int32_t)b;
2736 round = get_round(vxrm, res, 15);
2737 res = (res >> 15) + round;
2738
2739 if (res > INT16_MAX) {
2740 env->vxsat = 0x1;
2741 return INT16_MAX;
2742 } else if (res < INT16_MIN) {
2743 env->vxsat = 0x1;
2744 return INT16_MIN;
2745 } else {
2746 return res;
2747 }
2748 }
2749
vsmul32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2750 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2751 {
2752 uint8_t round;
2753 int64_t res;
2754
2755 res = (int64_t)a * (int64_t)b;
2756 round = get_round(vxrm, res, 31);
2757 res = (res >> 31) + round;
2758
2759 if (res > INT32_MAX) {
2760 env->vxsat = 0x1;
2761 return INT32_MAX;
2762 } else if (res < INT32_MIN) {
2763 env->vxsat = 0x1;
2764 return INT32_MIN;
2765 } else {
2766 return res;
2767 }
2768 }
2769
vsmul64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2770 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2771 {
2772 uint8_t round;
2773 uint64_t hi_64, lo_64;
2774 int64_t res;
2775
2776 if (a == INT64_MIN && b == INT64_MIN) {
2777 env->vxsat = 1;
2778 return INT64_MAX;
2779 }
2780
2781 muls64(&lo_64, &hi_64, a, b);
2782 round = get_round(vxrm, lo_64, 63);
2783 /*
2784 * Cannot overflow, as there are always
2785 * 2 sign bits after multiply.
2786 */
2787 res = (hi_64 << 1) | (lo_64 >> 63);
2788 if (round) {
2789 if (res == INT64_MAX) {
2790 env->vxsat = 1;
2791 } else {
2792 res += 1;
2793 }
2794 }
2795 return res;
2796 }
2797
RVVCALL(OPIVV2_RM,vsmul_vv_b,OP_SSS_B,H1,H1,H1,vsmul8)2798 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2799 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2800 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2801 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2802 GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2803 GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2804 GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2805 GEN_VEXT_VV_RM(vsmul_vv_d, 8)
2806
2807 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2808 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2809 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2810 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2811 GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2812 GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2813 GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2814 GEN_VEXT_VX_RM(vsmul_vx_d, 8)
2815
2816 /* Vector Single-Width Scaling Shift Instructions */
2817 static inline uint8_t
2818 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2819 {
2820 uint8_t round, shift = b & 0x7;
2821 uint8_t res;
2822
2823 round = get_round(vxrm, a, shift);
2824 res = (a >> shift) + round;
2825 return res;
2826 }
2827 static inline uint16_t
vssrl16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2828 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2829 {
2830 uint8_t round, shift = b & 0xf;
2831
2832 round = get_round(vxrm, a, shift);
2833 return (a >> shift) + round;
2834 }
2835 static inline uint32_t
vssrl32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2836 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2837 {
2838 uint8_t round, shift = b & 0x1f;
2839
2840 round = get_round(vxrm, a, shift);
2841 return (a >> shift) + round;
2842 }
2843 static inline uint64_t
vssrl64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2844 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2845 {
2846 uint8_t round, shift = b & 0x3f;
2847
2848 round = get_round(vxrm, a, shift);
2849 return (a >> shift) + round;
2850 }
RVVCALL(OPIVV2_RM,vssrl_vv_b,OP_UUU_B,H1,H1,H1,vssrl8)2851 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2852 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2853 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2854 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2855 GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2856 GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2857 GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2858 GEN_VEXT_VV_RM(vssrl_vv_d, 8)
2859
2860 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2861 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2862 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2863 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2864 GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2865 GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2866 GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2867 GEN_VEXT_VX_RM(vssrl_vx_d, 8)
2868
2869 static inline int8_t
2870 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2871 {
2872 uint8_t round, shift = b & 0x7;
2873
2874 round = get_round(vxrm, a, shift);
2875 return (a >> shift) + round;
2876 }
2877 static inline int16_t
vssra16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2878 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2879 {
2880 uint8_t round, shift = b & 0xf;
2881
2882 round = get_round(vxrm, a, shift);
2883 return (a >> shift) + round;
2884 }
2885 static inline int32_t
vssra32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2886 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2887 {
2888 uint8_t round, shift = b & 0x1f;
2889
2890 round = get_round(vxrm, a, shift);
2891 return (a >> shift) + round;
2892 }
2893 static inline int64_t
vssra64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2894 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2895 {
2896 uint8_t round, shift = b & 0x3f;
2897
2898 round = get_round(vxrm, a, shift);
2899 return (a >> shift) + round;
2900 }
2901
RVVCALL(OPIVV2_RM,vssra_vv_b,OP_SSS_B,H1,H1,H1,vssra8)2902 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2903 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2904 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2905 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2906 GEN_VEXT_VV_RM(vssra_vv_b, 1)
2907 GEN_VEXT_VV_RM(vssra_vv_h, 2)
2908 GEN_VEXT_VV_RM(vssra_vv_w, 4)
2909 GEN_VEXT_VV_RM(vssra_vv_d, 8)
2910
2911 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2912 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2913 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2914 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2915 GEN_VEXT_VX_RM(vssra_vx_b, 1)
2916 GEN_VEXT_VX_RM(vssra_vx_h, 2)
2917 GEN_VEXT_VX_RM(vssra_vx_w, 4)
2918 GEN_VEXT_VX_RM(vssra_vx_d, 8)
2919
2920 /* Vector Narrowing Fixed-Point Clip Instructions */
2921 static inline int8_t
2922 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2923 {
2924 uint8_t round, shift = b & 0xf;
2925 int16_t res;
2926
2927 round = get_round(vxrm, a, shift);
2928 res = (a >> shift) + round;
2929 if (res > INT8_MAX) {
2930 env->vxsat = 0x1;
2931 return INT8_MAX;
2932 } else if (res < INT8_MIN) {
2933 env->vxsat = 0x1;
2934 return INT8_MIN;
2935 } else {
2936 return res;
2937 }
2938 }
2939
2940 static inline int16_t
vnclip16(CPURISCVState * env,int vxrm,int32_t a,int16_t b)2941 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2942 {
2943 uint8_t round, shift = b & 0x1f;
2944 int32_t res;
2945
2946 round = get_round(vxrm, a, shift);
2947 res = (a >> shift) + round;
2948 if (res > INT16_MAX) {
2949 env->vxsat = 0x1;
2950 return INT16_MAX;
2951 } else if (res < INT16_MIN) {
2952 env->vxsat = 0x1;
2953 return INT16_MIN;
2954 } else {
2955 return res;
2956 }
2957 }
2958
2959 static inline int32_t
vnclip32(CPURISCVState * env,int vxrm,int64_t a,int32_t b)2960 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2961 {
2962 uint8_t round, shift = b & 0x3f;
2963 int64_t res;
2964
2965 round = get_round(vxrm, a, shift);
2966 res = (a >> shift) + round;
2967 if (res > INT32_MAX) {
2968 env->vxsat = 0x1;
2969 return INT32_MAX;
2970 } else if (res < INT32_MIN) {
2971 env->vxsat = 0x1;
2972 return INT32_MIN;
2973 } else {
2974 return res;
2975 }
2976 }
2977
RVVCALL(OPIVV2_RM,vnclip_wv_b,NOP_SSS_B,H1,H2,H1,vnclip8)2978 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2979 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2980 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2981 GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2982 GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2983 GEN_VEXT_VV_RM(vnclip_wv_w, 4)
2984
2985 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2986 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2987 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2988 GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2989 GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2990 GEN_VEXT_VX_RM(vnclip_wx_w, 4)
2991
2992 static inline uint8_t
2993 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2994 {
2995 uint8_t round, shift = b & 0xf;
2996 uint16_t res;
2997
2998 round = get_round(vxrm, a, shift);
2999 res = (a >> shift) + round;
3000 if (res > UINT8_MAX) {
3001 env->vxsat = 0x1;
3002 return UINT8_MAX;
3003 } else {
3004 return res;
3005 }
3006 }
3007
3008 static inline uint16_t
vnclipu16(CPURISCVState * env,int vxrm,uint32_t a,uint16_t b)3009 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
3010 {
3011 uint8_t round, shift = b & 0x1f;
3012 uint32_t res;
3013
3014 round = get_round(vxrm, a, shift);
3015 res = (a >> shift) + round;
3016 if (res > UINT16_MAX) {
3017 env->vxsat = 0x1;
3018 return UINT16_MAX;
3019 } else {
3020 return res;
3021 }
3022 }
3023
3024 static inline uint32_t
vnclipu32(CPURISCVState * env,int vxrm,uint64_t a,uint32_t b)3025 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
3026 {
3027 uint8_t round, shift = b & 0x3f;
3028 uint64_t res;
3029
3030 round = get_round(vxrm, a, shift);
3031 res = (a >> shift) + round;
3032 if (res > UINT32_MAX) {
3033 env->vxsat = 0x1;
3034 return UINT32_MAX;
3035 } else {
3036 return res;
3037 }
3038 }
3039
RVVCALL(OPIVV2_RM,vnclipu_wv_b,NOP_UUU_B,H1,H2,H1,vnclipu8)3040 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3041 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3042 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
3043 GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
3044 GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
3045 GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
3046
3047 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3048 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3049 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
3050 GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3051 GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3052 GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
3053
3054 /*
3055 * Vector Float Point Arithmetic Instructions
3056 */
3057 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
3058 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3059 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3060 CPURISCVState *env) \
3061 { \
3062 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3063 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3064 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3065 }
3066
3067 #define GEN_VEXT_VV_ENV(NAME, ESZ) \
3068 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3069 void *vs2, CPURISCVState *env, \
3070 uint32_t desc) \
3071 { \
3072 uint32_t vm = vext_vm(desc); \
3073 uint32_t vl = env->vl; \
3074 uint32_t total_elems = \
3075 vext_get_total_elems(env, desc, ESZ); \
3076 uint32_t vta = vext_vta(desc); \
3077 uint32_t vma = vext_vma(desc); \
3078 uint32_t i; \
3079 \
3080 VSTART_CHECK_EARLY_EXIT(env, vl); \
3081 \
3082 for (i = env->vstart; i < vl; i++) { \
3083 if (!vm && !vext_elem_mask(v0, i)) { \
3084 /* set masked-off elements to 1s */ \
3085 vext_set_elems_1s(vd, vma, i * ESZ, \
3086 (i + 1) * ESZ); \
3087 continue; \
3088 } \
3089 do_##NAME(vd, vs1, vs2, i, env); \
3090 } \
3091 env->vstart = 0; \
3092 /* set tail elements to 1s */ \
3093 vext_set_elems_1s(vd, vta, vl * ESZ, \
3094 total_elems * ESZ); \
3095 }
3096
3097 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3098 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3099 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3100 GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3101 GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3102 GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
3103
3104 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3105 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3106 CPURISCVState *env) \
3107 { \
3108 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3109 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3110 }
3111
3112 #define GEN_VEXT_VF(NAME, ESZ) \
3113 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3114 void *vs2, CPURISCVState *env, \
3115 uint32_t desc) \
3116 { \
3117 uint32_t vm = vext_vm(desc); \
3118 uint32_t vl = env->vl; \
3119 uint32_t total_elems = \
3120 vext_get_total_elems(env, desc, ESZ); \
3121 uint32_t vta = vext_vta(desc); \
3122 uint32_t vma = vext_vma(desc); \
3123 uint32_t i; \
3124 \
3125 VSTART_CHECK_EARLY_EXIT(env, vl); \
3126 \
3127 for (i = env->vstart; i < vl; i++) { \
3128 if (!vm && !vext_elem_mask(v0, i)) { \
3129 /* set masked-off elements to 1s */ \
3130 vext_set_elems_1s(vd, vma, i * ESZ, \
3131 (i + 1) * ESZ); \
3132 continue; \
3133 } \
3134 do_##NAME(vd, s1, vs2, i, env); \
3135 } \
3136 env->vstart = 0; \
3137 /* set tail elements to 1s */ \
3138 vext_set_elems_1s(vd, vta, vl * ESZ, \
3139 total_elems * ESZ); \
3140 }
3141
3142 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3143 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3144 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3145 GEN_VEXT_VF(vfadd_vf_h, 2)
3146 GEN_VEXT_VF(vfadd_vf_w, 4)
3147 GEN_VEXT_VF(vfadd_vf_d, 8)
3148
3149 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3150 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3151 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3152 GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3153 GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3154 GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
3155 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3156 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3157 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3158 GEN_VEXT_VF(vfsub_vf_h, 2)
3159 GEN_VEXT_VF(vfsub_vf_w, 4)
3160 GEN_VEXT_VF(vfsub_vf_d, 8)
3161
3162 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3163 {
3164 return float16_sub(b, a, s);
3165 }
3166
float32_rsub(uint32_t a,uint32_t b,float_status * s)3167 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3168 {
3169 return float32_sub(b, a, s);
3170 }
3171
float64_rsub(uint64_t a,uint64_t b,float_status * s)3172 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3173 {
3174 return float64_sub(b, a, s);
3175 }
3176
RVVCALL(OPFVF2,vfrsub_vf_h,OP_UUU_H,H2,H2,float16_rsub)3177 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3178 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3179 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3180 GEN_VEXT_VF(vfrsub_vf_h, 2)
3181 GEN_VEXT_VF(vfrsub_vf_w, 4)
3182 GEN_VEXT_VF(vfrsub_vf_d, 8)
3183
3184 /* Vector Widening Floating-Point Add/Subtract Instructions */
3185 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3186 {
3187 return float32_add(float16_to_float32(a, true, s),
3188 float16_to_float32(b, true, s), s);
3189 }
3190
vfwadd32(uint32_t a,uint32_t b,float_status * s)3191 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3192 {
3193 return float64_add(float32_to_float64(a, s),
3194 float32_to_float64(b, s), s);
3195
3196 }
3197
RVVCALL(OPFVV2,vfwadd_vv_h,WOP_UUU_H,H4,H2,H2,vfwadd16)3198 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3199 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3200 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3201 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
3202 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3203 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3204 GEN_VEXT_VF(vfwadd_vf_h, 4)
3205 GEN_VEXT_VF(vfwadd_vf_w, 8)
3206
3207 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3208 {
3209 return float32_sub(float16_to_float32(a, true, s),
3210 float16_to_float32(b, true, s), s);
3211 }
3212
vfwsub32(uint32_t a,uint32_t b,float_status * s)3213 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3214 {
3215 return float64_sub(float32_to_float64(a, s),
3216 float32_to_float64(b, s), s);
3217
3218 }
3219
RVVCALL(OPFVV2,vfwsub_vv_h,WOP_UUU_H,H4,H2,H2,vfwsub16)3220 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3221 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3222 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3223 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
3224 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3225 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3226 GEN_VEXT_VF(vfwsub_vf_h, 4)
3227 GEN_VEXT_VF(vfwsub_vf_w, 8)
3228
3229 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3230 {
3231 return float32_add(a, float16_to_float32(b, true, s), s);
3232 }
3233
vfwaddw32(uint64_t a,uint32_t b,float_status * s)3234 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3235 {
3236 return float64_add(a, float32_to_float64(b, s), s);
3237 }
3238
RVVCALL(OPFVV2,vfwadd_wv_h,WOP_WUUU_H,H4,H2,H2,vfwaddw16)3239 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3240 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3241 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3242 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
3243 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3244 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3245 GEN_VEXT_VF(vfwadd_wf_h, 4)
3246 GEN_VEXT_VF(vfwadd_wf_w, 8)
3247
3248 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3249 {
3250 return float32_sub(a, float16_to_float32(b, true, s), s);
3251 }
3252
vfwsubw32(uint64_t a,uint32_t b,float_status * s)3253 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3254 {
3255 return float64_sub(a, float32_to_float64(b, s), s);
3256 }
3257
RVVCALL(OPFVV2,vfwsub_wv_h,WOP_WUUU_H,H4,H2,H2,vfwsubw16)3258 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3259 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3260 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3261 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
3262 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3263 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3264 GEN_VEXT_VF(vfwsub_wf_h, 4)
3265 GEN_VEXT_VF(vfwsub_wf_w, 8)
3266
3267 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3268 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3269 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3270 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3271 GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3272 GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3273 GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
3274 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3275 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3276 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3277 GEN_VEXT_VF(vfmul_vf_h, 2)
3278 GEN_VEXT_VF(vfmul_vf_w, 4)
3279 GEN_VEXT_VF(vfmul_vf_d, 8)
3280
3281 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3282 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3283 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3284 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3285 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3286 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
3287 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3288 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3289 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3290 GEN_VEXT_VF(vfdiv_vf_h, 2)
3291 GEN_VEXT_VF(vfdiv_vf_w, 4)
3292 GEN_VEXT_VF(vfdiv_vf_d, 8)
3293
3294 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3295 {
3296 return float16_div(b, a, s);
3297 }
3298
float32_rdiv(uint32_t a,uint32_t b,float_status * s)3299 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3300 {
3301 return float32_div(b, a, s);
3302 }
3303
float64_rdiv(uint64_t a,uint64_t b,float_status * s)3304 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3305 {
3306 return float64_div(b, a, s);
3307 }
3308
RVVCALL(OPFVF2,vfrdiv_vf_h,OP_UUU_H,H2,H2,float16_rdiv)3309 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3310 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3311 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3312 GEN_VEXT_VF(vfrdiv_vf_h, 2)
3313 GEN_VEXT_VF(vfrdiv_vf_w, 4)
3314 GEN_VEXT_VF(vfrdiv_vf_d, 8)
3315
3316 /* Vector Widening Floating-Point Multiply */
3317 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3318 {
3319 return float32_mul(float16_to_float32(a, true, s),
3320 float16_to_float32(b, true, s), s);
3321 }
3322
vfwmul32(uint32_t a,uint32_t b,float_status * s)3323 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3324 {
3325 return float64_mul(float32_to_float64(a, s),
3326 float32_to_float64(b, s), s);
3327
3328 }
RVVCALL(OPFVV2,vfwmul_vv_h,WOP_UUU_H,H4,H2,H2,vfwmul16)3329 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3330 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3331 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3332 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
3333 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3334 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3335 GEN_VEXT_VF(vfwmul_vf_h, 4)
3336 GEN_VEXT_VF(vfwmul_vf_w, 8)
3337
3338 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3339 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3340 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3341 CPURISCVState *env) \
3342 { \
3343 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3344 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3345 TD d = *((TD *)vd + HD(i)); \
3346 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3347 }
3348
3349 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3350 {
3351 return float16_muladd(a, b, d, 0, s);
3352 }
3353
fmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3354 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3355 {
3356 return float32_muladd(a, b, d, 0, s);
3357 }
3358
fmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3359 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3360 {
3361 return float64_muladd(a, b, d, 0, s);
3362 }
3363
RVVCALL(OPFVV3,vfmacc_vv_h,OP_UUU_H,H2,H2,H2,fmacc16)3364 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3365 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3366 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3367 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3368 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3369 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
3370
3371 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3372 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3373 CPURISCVState *env) \
3374 { \
3375 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3376 TD d = *((TD *)vd + HD(i)); \
3377 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3378 }
3379
3380 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3381 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3382 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3383 GEN_VEXT_VF(vfmacc_vf_h, 2)
3384 GEN_VEXT_VF(vfmacc_vf_w, 4)
3385 GEN_VEXT_VF(vfmacc_vf_d, 8)
3386
3387 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3388 {
3389 return float16_muladd(a, b, d, float_muladd_negate_c |
3390 float_muladd_negate_product, s);
3391 }
3392
fnmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3393 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3394 {
3395 return float32_muladd(a, b, d, float_muladd_negate_c |
3396 float_muladd_negate_product, s);
3397 }
3398
fnmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3399 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3400 {
3401 return float64_muladd(a, b, d, float_muladd_negate_c |
3402 float_muladd_negate_product, s);
3403 }
3404
RVVCALL(OPFVV3,vfnmacc_vv_h,OP_UUU_H,H2,H2,H2,fnmacc16)3405 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3406 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3407 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3408 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3409 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3410 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
3411 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3412 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3413 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3414 GEN_VEXT_VF(vfnmacc_vf_h, 2)
3415 GEN_VEXT_VF(vfnmacc_vf_w, 4)
3416 GEN_VEXT_VF(vfnmacc_vf_d, 8)
3417
3418 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3419 {
3420 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3421 }
3422
fmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3423 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3424 {
3425 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3426 }
3427
fmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3428 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3429 {
3430 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3431 }
3432
RVVCALL(OPFVV3,vfmsac_vv_h,OP_UUU_H,H2,H2,H2,fmsac16)3433 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3434 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3435 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3436 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3437 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3438 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
3439 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3440 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3441 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3442 GEN_VEXT_VF(vfmsac_vf_h, 2)
3443 GEN_VEXT_VF(vfmsac_vf_w, 4)
3444 GEN_VEXT_VF(vfmsac_vf_d, 8)
3445
3446 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3447 {
3448 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3449 }
3450
fnmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3451 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3452 {
3453 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3454 }
3455
fnmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3456 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3457 {
3458 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3459 }
3460
RVVCALL(OPFVV3,vfnmsac_vv_h,OP_UUU_H,H2,H2,H2,fnmsac16)3461 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3462 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3463 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3464 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3465 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3466 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
3467 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3468 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3469 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3470 GEN_VEXT_VF(vfnmsac_vf_h, 2)
3471 GEN_VEXT_VF(vfnmsac_vf_w, 4)
3472 GEN_VEXT_VF(vfnmsac_vf_d, 8)
3473
3474 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3475 {
3476 return float16_muladd(d, b, a, 0, s);
3477 }
3478
fmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3479 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3480 {
3481 return float32_muladd(d, b, a, 0, s);
3482 }
3483
fmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3484 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3485 {
3486 return float64_muladd(d, b, a, 0, s);
3487 }
3488
RVVCALL(OPFVV3,vfmadd_vv_h,OP_UUU_H,H2,H2,H2,fmadd16)3489 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3490 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3491 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3492 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3493 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3494 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
3495 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3496 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3497 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3498 GEN_VEXT_VF(vfmadd_vf_h, 2)
3499 GEN_VEXT_VF(vfmadd_vf_w, 4)
3500 GEN_VEXT_VF(vfmadd_vf_d, 8)
3501
3502 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3503 {
3504 return float16_muladd(d, b, a, float_muladd_negate_c |
3505 float_muladd_negate_product, s);
3506 }
3507
fnmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3508 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3509 {
3510 return float32_muladd(d, b, a, float_muladd_negate_c |
3511 float_muladd_negate_product, s);
3512 }
3513
fnmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3514 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3515 {
3516 return float64_muladd(d, b, a, float_muladd_negate_c |
3517 float_muladd_negate_product, s);
3518 }
3519
RVVCALL(OPFVV3,vfnmadd_vv_h,OP_UUU_H,H2,H2,H2,fnmadd16)3520 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3521 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3522 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3523 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3524 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3525 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
3526 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3527 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3528 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3529 GEN_VEXT_VF(vfnmadd_vf_h, 2)
3530 GEN_VEXT_VF(vfnmadd_vf_w, 4)
3531 GEN_VEXT_VF(vfnmadd_vf_d, 8)
3532
3533 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3534 {
3535 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3536 }
3537
fmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3538 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3539 {
3540 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3541 }
3542
fmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3543 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3544 {
3545 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3546 }
3547
RVVCALL(OPFVV3,vfmsub_vv_h,OP_UUU_H,H2,H2,H2,fmsub16)3548 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3549 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3550 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3551 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3552 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3553 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
3554 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3555 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3556 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3557 GEN_VEXT_VF(vfmsub_vf_h, 2)
3558 GEN_VEXT_VF(vfmsub_vf_w, 4)
3559 GEN_VEXT_VF(vfmsub_vf_d, 8)
3560
3561 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3562 {
3563 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3564 }
3565
fnmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3566 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3567 {
3568 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3569 }
3570
fnmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3571 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3572 {
3573 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3574 }
3575
RVVCALL(OPFVV3,vfnmsub_vv_h,OP_UUU_H,H2,H2,H2,fnmsub16)3576 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3577 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3578 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3579 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3580 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3581 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
3582 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3583 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3584 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3585 GEN_VEXT_VF(vfnmsub_vf_h, 2)
3586 GEN_VEXT_VF(vfnmsub_vf_w, 4)
3587 GEN_VEXT_VF(vfnmsub_vf_d, 8)
3588
3589 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3590 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3591 {
3592 return float32_muladd(float16_to_float32(a, true, s),
3593 float16_to_float32(b, true, s), d, 0, s);
3594 }
3595
fwmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3596 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3597 {
3598 return float64_muladd(float32_to_float64(a, s),
3599 float32_to_float64(b, s), d, 0, s);
3600 }
3601
RVVCALL(OPFVV3,vfwmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwmacc16)3602 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3603 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3604 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3605 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
3606 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3607 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3608 GEN_VEXT_VF(vfwmacc_vf_h, 4)
3609 GEN_VEXT_VF(vfwmacc_vf_w, 8)
3610
3611 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3612 {
3613 return float32_muladd(bfloat16_to_float32(a, s),
3614 bfloat16_to_float32(b, s), d, 0, s);
3615 }
3616
RVVCALL(OPFVV3,vfwmaccbf16_vv,WOP_UUU_H,H4,H2,H2,fwmaccbf16)3617 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3618 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3619 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
3620 GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3621
3622 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3623 {
3624 return float32_muladd(float16_to_float32(a, true, s),
3625 float16_to_float32(b, true, s), d,
3626 float_muladd_negate_c | float_muladd_negate_product,
3627 s);
3628 }
3629
fwnmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3630 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3631 {
3632 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3633 d, float_muladd_negate_c |
3634 float_muladd_negate_product, s);
3635 }
3636
RVVCALL(OPFVV3,vfwnmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwnmacc16)3637 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3638 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3639 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3640 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
3641 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3642 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3643 GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3644 GEN_VEXT_VF(vfwnmacc_vf_w, 8)
3645
3646 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3647 {
3648 return float32_muladd(float16_to_float32(a, true, s),
3649 float16_to_float32(b, true, s), d,
3650 float_muladd_negate_c, s);
3651 }
3652
fwmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3653 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3654 {
3655 return float64_muladd(float32_to_float64(a, s),
3656 float32_to_float64(b, s), d,
3657 float_muladd_negate_c, s);
3658 }
3659
RVVCALL(OPFVV3,vfwmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwmsac16)3660 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3661 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3662 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3663 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
3664 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3665 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3666 GEN_VEXT_VF(vfwmsac_vf_h, 4)
3667 GEN_VEXT_VF(vfwmsac_vf_w, 8)
3668
3669 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3670 {
3671 return float32_muladd(float16_to_float32(a, true, s),
3672 float16_to_float32(b, true, s), d,
3673 float_muladd_negate_product, s);
3674 }
3675
fwnmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3676 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3677 {
3678 return float64_muladd(float32_to_float64(a, s),
3679 float32_to_float64(b, s), d,
3680 float_muladd_negate_product, s);
3681 }
3682
RVVCALL(OPFVV3,vfwnmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwnmsac16)3683 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3684 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3685 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3686 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
3687 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3688 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3689 GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3690 GEN_VEXT_VF(vfwnmsac_vf_w, 8)
3691
3692 /* Vector Floating-Point Square-Root Instruction */
3693 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3694 static void do_##NAME(void *vd, void *vs2, int i, \
3695 CPURISCVState *env) \
3696 { \
3697 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3698 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3699 }
3700
3701 #define GEN_VEXT_V_ENV(NAME, ESZ) \
3702 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3703 CPURISCVState *env, uint32_t desc) \
3704 { \
3705 uint32_t vm = vext_vm(desc); \
3706 uint32_t vl = env->vl; \
3707 uint32_t total_elems = \
3708 vext_get_total_elems(env, desc, ESZ); \
3709 uint32_t vta = vext_vta(desc); \
3710 uint32_t vma = vext_vma(desc); \
3711 uint32_t i; \
3712 \
3713 VSTART_CHECK_EARLY_EXIT(env, vl); \
3714 \
3715 if (vl == 0) { \
3716 return; \
3717 } \
3718 for (i = env->vstart; i < vl; i++) { \
3719 if (!vm && !vext_elem_mask(v0, i)) { \
3720 /* set masked-off elements to 1s */ \
3721 vext_set_elems_1s(vd, vma, i * ESZ, \
3722 (i + 1) * ESZ); \
3723 continue; \
3724 } \
3725 do_##NAME(vd, vs2, i, env); \
3726 } \
3727 env->vstart = 0; \
3728 vext_set_elems_1s(vd, vta, vl * ESZ, \
3729 total_elems * ESZ); \
3730 }
3731
3732 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3733 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3734 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3735 GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3736 GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3737 GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
3738
3739 /*
3740 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3741 *
3742 * Adapted from riscv-v-spec recip.c:
3743 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3744 */
3745 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3746 {
3747 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3748 uint64_t exp = extract64(f, frac_size, exp_size);
3749 uint64_t frac = extract64(f, 0, frac_size);
3750
3751 const uint8_t lookup_table[] = {
3752 52, 51, 50, 48, 47, 46, 44, 43,
3753 42, 41, 40, 39, 38, 36, 35, 34,
3754 33, 32, 31, 30, 30, 29, 28, 27,
3755 26, 25, 24, 23, 23, 22, 21, 20,
3756 19, 19, 18, 17, 16, 16, 15, 14,
3757 14, 13, 12, 12, 11, 10, 10, 9,
3758 9, 8, 7, 7, 6, 6, 5, 4,
3759 4, 3, 3, 2, 2, 1, 1, 0,
3760 127, 125, 123, 121, 119, 118, 116, 114,
3761 113, 111, 109, 108, 106, 105, 103, 102,
3762 100, 99, 97, 96, 95, 93, 92, 91,
3763 90, 88, 87, 86, 85, 84, 83, 82,
3764 80, 79, 78, 77, 76, 75, 74, 73,
3765 72, 71, 70, 70, 69, 68, 67, 66,
3766 65, 64, 63, 63, 62, 61, 60, 59,
3767 59, 58, 57, 56, 56, 55, 54, 53
3768 };
3769 const int precision = 7;
3770
3771 if (exp == 0 && frac != 0) { /* subnormal */
3772 /* Normalize the subnormal. */
3773 while (extract64(frac, frac_size - 1, 1) == 0) {
3774 exp--;
3775 frac <<= 1;
3776 }
3777
3778 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3779 }
3780
3781 int idx = ((exp & 1) << (precision - 1)) |
3782 (frac >> (frac_size - precision + 1));
3783 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3784 (frac_size - precision);
3785 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3786
3787 uint64_t val = 0;
3788 val = deposit64(val, 0, frac_size, out_frac);
3789 val = deposit64(val, frac_size, exp_size, out_exp);
3790 val = deposit64(val, frac_size + exp_size, 1, sign);
3791 return val;
3792 }
3793
frsqrt7_h(float16 f,float_status * s)3794 static float16 frsqrt7_h(float16 f, float_status *s)
3795 {
3796 int exp_size = 5, frac_size = 10;
3797 bool sign = float16_is_neg(f);
3798
3799 /*
3800 * frsqrt7(sNaN) = canonical NaN
3801 * frsqrt7(-inf) = canonical NaN
3802 * frsqrt7(-normal) = canonical NaN
3803 * frsqrt7(-subnormal) = canonical NaN
3804 */
3805 if (float16_is_signaling_nan(f, s) ||
3806 (float16_is_infinity(f) && sign) ||
3807 (float16_is_normal(f) && sign) ||
3808 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3809 s->float_exception_flags |= float_flag_invalid;
3810 return float16_default_nan(s);
3811 }
3812
3813 /* frsqrt7(qNaN) = canonical NaN */
3814 if (float16_is_quiet_nan(f, s)) {
3815 return float16_default_nan(s);
3816 }
3817
3818 /* frsqrt7(+-0) = +-inf */
3819 if (float16_is_zero(f)) {
3820 s->float_exception_flags |= float_flag_divbyzero;
3821 return float16_set_sign(float16_infinity, sign);
3822 }
3823
3824 /* frsqrt7(+inf) = +0 */
3825 if (float16_is_infinity(f) && !sign) {
3826 return float16_set_sign(float16_zero, sign);
3827 }
3828
3829 /* +normal, +subnormal */
3830 uint64_t val = frsqrt7(f, exp_size, frac_size);
3831 return make_float16(val);
3832 }
3833
frsqrt7_s(float32 f,float_status * s)3834 static float32 frsqrt7_s(float32 f, float_status *s)
3835 {
3836 int exp_size = 8, frac_size = 23;
3837 bool sign = float32_is_neg(f);
3838
3839 /*
3840 * frsqrt7(sNaN) = canonical NaN
3841 * frsqrt7(-inf) = canonical NaN
3842 * frsqrt7(-normal) = canonical NaN
3843 * frsqrt7(-subnormal) = canonical NaN
3844 */
3845 if (float32_is_signaling_nan(f, s) ||
3846 (float32_is_infinity(f) && sign) ||
3847 (float32_is_normal(f) && sign) ||
3848 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3849 s->float_exception_flags |= float_flag_invalid;
3850 return float32_default_nan(s);
3851 }
3852
3853 /* frsqrt7(qNaN) = canonical NaN */
3854 if (float32_is_quiet_nan(f, s)) {
3855 return float32_default_nan(s);
3856 }
3857
3858 /* frsqrt7(+-0) = +-inf */
3859 if (float32_is_zero(f)) {
3860 s->float_exception_flags |= float_flag_divbyzero;
3861 return float32_set_sign(float32_infinity, sign);
3862 }
3863
3864 /* frsqrt7(+inf) = +0 */
3865 if (float32_is_infinity(f) && !sign) {
3866 return float32_set_sign(float32_zero, sign);
3867 }
3868
3869 /* +normal, +subnormal */
3870 uint64_t val = frsqrt7(f, exp_size, frac_size);
3871 return make_float32(val);
3872 }
3873
frsqrt7_d(float64 f,float_status * s)3874 static float64 frsqrt7_d(float64 f, float_status *s)
3875 {
3876 int exp_size = 11, frac_size = 52;
3877 bool sign = float64_is_neg(f);
3878
3879 /*
3880 * frsqrt7(sNaN) = canonical NaN
3881 * frsqrt7(-inf) = canonical NaN
3882 * frsqrt7(-normal) = canonical NaN
3883 * frsqrt7(-subnormal) = canonical NaN
3884 */
3885 if (float64_is_signaling_nan(f, s) ||
3886 (float64_is_infinity(f) && sign) ||
3887 (float64_is_normal(f) && sign) ||
3888 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3889 s->float_exception_flags |= float_flag_invalid;
3890 return float64_default_nan(s);
3891 }
3892
3893 /* frsqrt7(qNaN) = canonical NaN */
3894 if (float64_is_quiet_nan(f, s)) {
3895 return float64_default_nan(s);
3896 }
3897
3898 /* frsqrt7(+-0) = +-inf */
3899 if (float64_is_zero(f)) {
3900 s->float_exception_flags |= float_flag_divbyzero;
3901 return float64_set_sign(float64_infinity, sign);
3902 }
3903
3904 /* frsqrt7(+inf) = +0 */
3905 if (float64_is_infinity(f) && !sign) {
3906 return float64_set_sign(float64_zero, sign);
3907 }
3908
3909 /* +normal, +subnormal */
3910 uint64_t val = frsqrt7(f, exp_size, frac_size);
3911 return make_float64(val);
3912 }
3913
RVVCALL(OPFVV1,vfrsqrt7_v_h,OP_UU_H,H2,H2,frsqrt7_h)3914 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3915 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3916 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3917 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3918 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3919 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
3920
3921 /*
3922 * Vector Floating-Point Reciprocal Estimate Instruction
3923 *
3924 * Adapted from riscv-v-spec recip.c:
3925 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3926 */
3927 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3928 float_status *s)
3929 {
3930 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3931 uint64_t exp = extract64(f, frac_size, exp_size);
3932 uint64_t frac = extract64(f, 0, frac_size);
3933
3934 const uint8_t lookup_table[] = {
3935 127, 125, 123, 121, 119, 117, 116, 114,
3936 112, 110, 109, 107, 105, 104, 102, 100,
3937 99, 97, 96, 94, 93, 91, 90, 88,
3938 87, 85, 84, 83, 81, 80, 79, 77,
3939 76, 75, 74, 72, 71, 70, 69, 68,
3940 66, 65, 64, 63, 62, 61, 60, 59,
3941 58, 57, 56, 55, 54, 53, 52, 51,
3942 50, 49, 48, 47, 46, 45, 44, 43,
3943 42, 41, 40, 40, 39, 38, 37, 36,
3944 35, 35, 34, 33, 32, 31, 31, 30,
3945 29, 28, 28, 27, 26, 25, 25, 24,
3946 23, 23, 22, 21, 21, 20, 19, 19,
3947 18, 17, 17, 16, 15, 15, 14, 14,
3948 13, 12, 12, 11, 11, 10, 9, 9,
3949 8, 8, 7, 7, 6, 5, 5, 4,
3950 4, 3, 3, 2, 2, 1, 1, 0
3951 };
3952 const int precision = 7;
3953
3954 if (exp == 0 && frac != 0) { /* subnormal */
3955 /* Normalize the subnormal. */
3956 while (extract64(frac, frac_size - 1, 1) == 0) {
3957 exp--;
3958 frac <<= 1;
3959 }
3960
3961 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3962
3963 if (exp != 0 && exp != UINT64_MAX) {
3964 /*
3965 * Overflow to inf or max value of same sign,
3966 * depending on sign and rounding mode.
3967 */
3968 s->float_exception_flags |= (float_flag_inexact |
3969 float_flag_overflow);
3970
3971 if ((s->float_rounding_mode == float_round_to_zero) ||
3972 ((s->float_rounding_mode == float_round_down) && !sign) ||
3973 ((s->float_rounding_mode == float_round_up) && sign)) {
3974 /* Return greatest/negative finite value. */
3975 return (sign << (exp_size + frac_size)) |
3976 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3977 } else {
3978 /* Return +-inf. */
3979 return (sign << (exp_size + frac_size)) |
3980 MAKE_64BIT_MASK(frac_size, exp_size);
3981 }
3982 }
3983 }
3984
3985 int idx = frac >> (frac_size - precision);
3986 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3987 (frac_size - precision);
3988 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3989
3990 if (out_exp == 0 || out_exp == UINT64_MAX) {
3991 /*
3992 * The result is subnormal, but don't raise the underflow exception,
3993 * because there's no additional loss of precision.
3994 */
3995 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3996 if (out_exp == UINT64_MAX) {
3997 out_frac >>= 1;
3998 out_exp = 0;
3999 }
4000 }
4001
4002 uint64_t val = 0;
4003 val = deposit64(val, 0, frac_size, out_frac);
4004 val = deposit64(val, frac_size, exp_size, out_exp);
4005 val = deposit64(val, frac_size + exp_size, 1, sign);
4006 return val;
4007 }
4008
frec7_h(float16 f,float_status * s)4009 static float16 frec7_h(float16 f, float_status *s)
4010 {
4011 int exp_size = 5, frac_size = 10;
4012 bool sign = float16_is_neg(f);
4013
4014 /* frec7(+-inf) = +-0 */
4015 if (float16_is_infinity(f)) {
4016 return float16_set_sign(float16_zero, sign);
4017 }
4018
4019 /* frec7(+-0) = +-inf */
4020 if (float16_is_zero(f)) {
4021 s->float_exception_flags |= float_flag_divbyzero;
4022 return float16_set_sign(float16_infinity, sign);
4023 }
4024
4025 /* frec7(sNaN) = canonical NaN */
4026 if (float16_is_signaling_nan(f, s)) {
4027 s->float_exception_flags |= float_flag_invalid;
4028 return float16_default_nan(s);
4029 }
4030
4031 /* frec7(qNaN) = canonical NaN */
4032 if (float16_is_quiet_nan(f, s)) {
4033 return float16_default_nan(s);
4034 }
4035
4036 /* +-normal, +-subnormal */
4037 uint64_t val = frec7(f, exp_size, frac_size, s);
4038 return make_float16(val);
4039 }
4040
frec7_s(float32 f,float_status * s)4041 static float32 frec7_s(float32 f, float_status *s)
4042 {
4043 int exp_size = 8, frac_size = 23;
4044 bool sign = float32_is_neg(f);
4045
4046 /* frec7(+-inf) = +-0 */
4047 if (float32_is_infinity(f)) {
4048 return float32_set_sign(float32_zero, sign);
4049 }
4050
4051 /* frec7(+-0) = +-inf */
4052 if (float32_is_zero(f)) {
4053 s->float_exception_flags |= float_flag_divbyzero;
4054 return float32_set_sign(float32_infinity, sign);
4055 }
4056
4057 /* frec7(sNaN) = canonical NaN */
4058 if (float32_is_signaling_nan(f, s)) {
4059 s->float_exception_flags |= float_flag_invalid;
4060 return float32_default_nan(s);
4061 }
4062
4063 /* frec7(qNaN) = canonical NaN */
4064 if (float32_is_quiet_nan(f, s)) {
4065 return float32_default_nan(s);
4066 }
4067
4068 /* +-normal, +-subnormal */
4069 uint64_t val = frec7(f, exp_size, frac_size, s);
4070 return make_float32(val);
4071 }
4072
frec7_d(float64 f,float_status * s)4073 static float64 frec7_d(float64 f, float_status *s)
4074 {
4075 int exp_size = 11, frac_size = 52;
4076 bool sign = float64_is_neg(f);
4077
4078 /* frec7(+-inf) = +-0 */
4079 if (float64_is_infinity(f)) {
4080 return float64_set_sign(float64_zero, sign);
4081 }
4082
4083 /* frec7(+-0) = +-inf */
4084 if (float64_is_zero(f)) {
4085 s->float_exception_flags |= float_flag_divbyzero;
4086 return float64_set_sign(float64_infinity, sign);
4087 }
4088
4089 /* frec7(sNaN) = canonical NaN */
4090 if (float64_is_signaling_nan(f, s)) {
4091 s->float_exception_flags |= float_flag_invalid;
4092 return float64_default_nan(s);
4093 }
4094
4095 /* frec7(qNaN) = canonical NaN */
4096 if (float64_is_quiet_nan(f, s)) {
4097 return float64_default_nan(s);
4098 }
4099
4100 /* +-normal, +-subnormal */
4101 uint64_t val = frec7(f, exp_size, frac_size, s);
4102 return make_float64(val);
4103 }
4104
RVVCALL(OPFVV1,vfrec7_v_h,OP_UU_H,H2,H2,frec7_h)4105 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4106 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4107 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
4108 GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4109 GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4110 GEN_VEXT_V_ENV(vfrec7_v_d, 8)
4111
4112 /* Vector Floating-Point MIN/MAX Instructions */
4113 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4114 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4115 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
4116 GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4117 GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4118 GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
4119 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4120 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4121 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
4122 GEN_VEXT_VF(vfmin_vf_h, 2)
4123 GEN_VEXT_VF(vfmin_vf_w, 4)
4124 GEN_VEXT_VF(vfmin_vf_d, 8)
4125
4126 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4127 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4128 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
4129 GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4130 GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4131 GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
4132 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4133 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4134 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
4135 GEN_VEXT_VF(vfmax_vf_h, 2)
4136 GEN_VEXT_VF(vfmax_vf_w, 4)
4137 GEN_VEXT_VF(vfmax_vf_d, 8)
4138
4139 /* Vector Floating-Point Sign-Injection Instructions */
4140 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4141 {
4142 return deposit64(b, 0, 15, a);
4143 }
4144
fsgnj32(uint32_t a,uint32_t b,float_status * s)4145 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4146 {
4147 return deposit64(b, 0, 31, a);
4148 }
4149
fsgnj64(uint64_t a,uint64_t b,float_status * s)4150 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4151 {
4152 return deposit64(b, 0, 63, a);
4153 }
4154
RVVCALL(OPFVV2,vfsgnj_vv_h,OP_UUU_H,H2,H2,H2,fsgnj16)4155 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4156 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4157 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
4158 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4159 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4160 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
4161 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4162 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4163 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
4164 GEN_VEXT_VF(vfsgnj_vf_h, 2)
4165 GEN_VEXT_VF(vfsgnj_vf_w, 4)
4166 GEN_VEXT_VF(vfsgnj_vf_d, 8)
4167
4168 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4169 {
4170 return deposit64(~b, 0, 15, a);
4171 }
4172
fsgnjn32(uint32_t a,uint32_t b,float_status * s)4173 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4174 {
4175 return deposit64(~b, 0, 31, a);
4176 }
4177
fsgnjn64(uint64_t a,uint64_t b,float_status * s)4178 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4179 {
4180 return deposit64(~b, 0, 63, a);
4181 }
4182
RVVCALL(OPFVV2,vfsgnjn_vv_h,OP_UUU_H,H2,H2,H2,fsgnjn16)4183 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4184 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4185 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
4186 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4187 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4188 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
4189 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4190 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4191 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
4192 GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4193 GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4194 GEN_VEXT_VF(vfsgnjn_vf_d, 8)
4195
4196 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4197 {
4198 return deposit64(b ^ a, 0, 15, a);
4199 }
4200
fsgnjx32(uint32_t a,uint32_t b,float_status * s)4201 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4202 {
4203 return deposit64(b ^ a, 0, 31, a);
4204 }
4205
fsgnjx64(uint64_t a,uint64_t b,float_status * s)4206 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4207 {
4208 return deposit64(b ^ a, 0, 63, a);
4209 }
4210
RVVCALL(OPFVV2,vfsgnjx_vv_h,OP_UUU_H,H2,H2,H2,fsgnjx16)4211 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4212 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4213 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4214 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4215 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4216 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
4217 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4218 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4219 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4220 GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4221 GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4222 GEN_VEXT_VF(vfsgnjx_vf_d, 8)
4223
4224 /* Vector Floating-Point Compare Instructions */
4225 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4226 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4227 CPURISCVState *env, uint32_t desc) \
4228 { \
4229 uint32_t vm = vext_vm(desc); \
4230 uint32_t vl = env->vl; \
4231 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4232 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4233 uint32_t vma = vext_vma(desc); \
4234 uint32_t i; \
4235 \
4236 VSTART_CHECK_EARLY_EXIT(env, vl); \
4237 \
4238 for (i = env->vstart; i < vl; i++) { \
4239 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4240 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4241 if (!vm && !vext_elem_mask(v0, i)) { \
4242 /* set masked-off elements to 1s */ \
4243 if (vma) { \
4244 vext_set_elem_mask(vd, i, 1); \
4245 } \
4246 continue; \
4247 } \
4248 vext_set_elem_mask(vd, i, \
4249 DO_OP(s2, s1, &env->fp_status)); \
4250 } \
4251 env->vstart = 0; \
4252 /*
4253 * mask destination register are always tail-agnostic
4254 * set tail elements to 1s
4255 */ \
4256 if (vta_all_1s) { \
4257 for (; i < total_elems; i++) { \
4258 vext_set_elem_mask(vd, i, 1); \
4259 } \
4260 } \
4261 }
4262
4263 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4264 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4265 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4266
4267 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4268 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4269 CPURISCVState *env, uint32_t desc) \
4270 { \
4271 uint32_t vm = vext_vm(desc); \
4272 uint32_t vl = env->vl; \
4273 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4274 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4275 uint32_t vma = vext_vma(desc); \
4276 uint32_t i; \
4277 \
4278 VSTART_CHECK_EARLY_EXIT(env, vl); \
4279 \
4280 for (i = env->vstart; i < vl; i++) { \
4281 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4282 if (!vm && !vext_elem_mask(v0, i)) { \
4283 /* set masked-off elements to 1s */ \
4284 if (vma) { \
4285 vext_set_elem_mask(vd, i, 1); \
4286 } \
4287 continue; \
4288 } \
4289 vext_set_elem_mask(vd, i, \
4290 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4291 } \
4292 env->vstart = 0; \
4293 /*
4294 * mask destination register are always tail-agnostic
4295 * set tail elements to 1s
4296 */ \
4297 if (vta_all_1s) { \
4298 for (; i < total_elems; i++) { \
4299 vext_set_elem_mask(vd, i, 1); \
4300 } \
4301 } \
4302 }
4303
4304 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4305 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4306 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4307
4308 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4309 {
4310 FloatRelation compare = float16_compare_quiet(a, b, s);
4311 return compare != float_relation_equal;
4312 }
4313
vmfne32(uint32_t a,uint32_t b,float_status * s)4314 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4315 {
4316 FloatRelation compare = float32_compare_quiet(a, b, s);
4317 return compare != float_relation_equal;
4318 }
4319
vmfne64(uint64_t a,uint64_t b,float_status * s)4320 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4321 {
4322 FloatRelation compare = float64_compare_quiet(a, b, s);
4323 return compare != float_relation_equal;
4324 }
4325
GEN_VEXT_CMP_VV_ENV(vmfne_vv_h,uint16_t,H2,vmfne16)4326 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4327 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4328 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4329 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4330 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4331 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4332
4333 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4334 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4335 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4336 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4337 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4338 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4339
4340 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4341 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4342 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4343 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4344 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4345 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4346
4347 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4348 {
4349 FloatRelation compare = float16_compare(a, b, s);
4350 return compare == float_relation_greater;
4351 }
4352
vmfgt32(uint32_t a,uint32_t b,float_status * s)4353 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4354 {
4355 FloatRelation compare = float32_compare(a, b, s);
4356 return compare == float_relation_greater;
4357 }
4358
vmfgt64(uint64_t a,uint64_t b,float_status * s)4359 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4360 {
4361 FloatRelation compare = float64_compare(a, b, s);
4362 return compare == float_relation_greater;
4363 }
4364
GEN_VEXT_CMP_VF(vmfgt_vf_h,uint16_t,H2,vmfgt16)4365 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4366 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4367 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4368
4369 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4370 {
4371 FloatRelation compare = float16_compare(a, b, s);
4372 return compare == float_relation_greater ||
4373 compare == float_relation_equal;
4374 }
4375
vmfge32(uint32_t a,uint32_t b,float_status * s)4376 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4377 {
4378 FloatRelation compare = float32_compare(a, b, s);
4379 return compare == float_relation_greater ||
4380 compare == float_relation_equal;
4381 }
4382
vmfge64(uint64_t a,uint64_t b,float_status * s)4383 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4384 {
4385 FloatRelation compare = float64_compare(a, b, s);
4386 return compare == float_relation_greater ||
4387 compare == float_relation_equal;
4388 }
4389
GEN_VEXT_CMP_VF(vmfge_vf_h,uint16_t,H2,vmfge16)4390 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4391 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4392 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4393
4394 /* Vector Floating-Point Classify Instruction */
4395 target_ulong fclass_h(uint64_t frs1)
4396 {
4397 float16 f = frs1;
4398 bool sign = float16_is_neg(f);
4399
4400 if (float16_is_infinity(f)) {
4401 return sign ? 1 << 0 : 1 << 7;
4402 } else if (float16_is_zero(f)) {
4403 return sign ? 1 << 3 : 1 << 4;
4404 } else if (float16_is_zero_or_denormal(f)) {
4405 return sign ? 1 << 2 : 1 << 5;
4406 } else if (float16_is_any_nan(f)) {
4407 float_status s = { }; /* for snan_bit_is_one */
4408 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4409 } else {
4410 return sign ? 1 << 1 : 1 << 6;
4411 }
4412 }
4413
fclass_s(uint64_t frs1)4414 target_ulong fclass_s(uint64_t frs1)
4415 {
4416 float32 f = frs1;
4417 bool sign = float32_is_neg(f);
4418
4419 if (float32_is_infinity(f)) {
4420 return sign ? 1 << 0 : 1 << 7;
4421 } else if (float32_is_zero(f)) {
4422 return sign ? 1 << 3 : 1 << 4;
4423 } else if (float32_is_zero_or_denormal(f)) {
4424 return sign ? 1 << 2 : 1 << 5;
4425 } else if (float32_is_any_nan(f)) {
4426 float_status s = { }; /* for snan_bit_is_one */
4427 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4428 } else {
4429 return sign ? 1 << 1 : 1 << 6;
4430 }
4431 }
4432
fclass_d(uint64_t frs1)4433 target_ulong fclass_d(uint64_t frs1)
4434 {
4435 float64 f = frs1;
4436 bool sign = float64_is_neg(f);
4437
4438 if (float64_is_infinity(f)) {
4439 return sign ? 1 << 0 : 1 << 7;
4440 } else if (float64_is_zero(f)) {
4441 return sign ? 1 << 3 : 1 << 4;
4442 } else if (float64_is_zero_or_denormal(f)) {
4443 return sign ? 1 << 2 : 1 << 5;
4444 } else if (float64_is_any_nan(f)) {
4445 float_status s = { }; /* for snan_bit_is_one */
4446 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4447 } else {
4448 return sign ? 1 << 1 : 1 << 6;
4449 }
4450 }
4451
RVVCALL(OPIVV1,vfclass_v_h,OP_UU_H,H2,H2,fclass_h)4452 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4453 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4454 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4455 GEN_VEXT_V(vfclass_v_h, 2)
4456 GEN_VEXT_V(vfclass_v_w, 4)
4457 GEN_VEXT_V(vfclass_v_d, 8)
4458
4459 /* Vector Floating-Point Merge Instruction */
4460
4461 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4462 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4463 CPURISCVState *env, uint32_t desc) \
4464 { \
4465 uint32_t vm = vext_vm(desc); \
4466 uint32_t vl = env->vl; \
4467 uint32_t esz = sizeof(ETYPE); \
4468 uint32_t total_elems = \
4469 vext_get_total_elems(env, desc, esz); \
4470 uint32_t vta = vext_vta(desc); \
4471 uint32_t i; \
4472 \
4473 VSTART_CHECK_EARLY_EXIT(env, vl); \
4474 \
4475 for (i = env->vstart; i < vl; i++) { \
4476 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4477 *((ETYPE *)vd + H(i)) = \
4478 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4479 } \
4480 env->vstart = 0; \
4481 /* set tail elements to 1s */ \
4482 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4483 }
4484
4485 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4486 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4487 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4488
4489 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4490 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4491 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4492 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4493 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4494 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4495 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4496 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
4497
4498 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4499 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4500 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4501 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4502 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4503 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4504 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
4505
4506 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4507 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4508 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4509 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4510 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4511 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4512 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
4513
4514 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4515 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4516 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4517 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4518 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4519 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4520 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4521
4522 /* Widening Floating-Point/Integer Type-Convert Instructions */
4523 /* (TD, T2, TX2) */
4524 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4525 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4526 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4527 /*
4528 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4529 */
4530 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4531 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4532 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4533 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4534
4535 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4536 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4537 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4538 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4539 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4540
4541 /*
4542 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4543 */
4544 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4545 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4546 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4547 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4548 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4549 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4550
4551 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4552 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4553 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4554 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4555 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4556 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4557 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4558
4559 /*
4560 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4561 */
4562 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4563 {
4564 return float16_to_float32(a, true, s);
4565 }
4566
RVVCALL(OPFVV1,vfwcvt_f_f_v_h,WOP_UU_H,H4,H2,vfwcvtffv16)4567 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4568 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4569 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4570 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
4571
4572 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4573 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4574
4575 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4576 /* (TD, T2, TX2) */
4577 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4578 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4579 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4580 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4581 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4582 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4583 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4584 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4585 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4586 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
4587
4588 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4589 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4590 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4591 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4592 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4593 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4594 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
4595
4596 /*
4597 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4598 */
4599 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4600 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4601 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4602 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
4603
4604 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4605 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4606 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4607 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4608 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
4609
4610 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4611 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4612 {
4613 return float32_to_float16(a, true, s);
4614 }
4615
RVVCALL(OPFVV1,vfncvt_f_f_w_h,NOP_UU_H,H2,H4,vfncvtffv16)4616 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4617 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4618 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4619 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
4620
4621 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4622 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4623
4624 /*
4625 * Vector Reduction Operations
4626 */
4627 /* Vector Single-Width Integer Reduction Instructions */
4628 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4629 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4630 void *vs2, CPURISCVState *env, \
4631 uint32_t desc) \
4632 { \
4633 uint32_t vm = vext_vm(desc); \
4634 uint32_t vl = env->vl; \
4635 uint32_t esz = sizeof(TD); \
4636 uint32_t vlenb = simd_maxsz(desc); \
4637 uint32_t vta = vext_vta(desc); \
4638 uint32_t i; \
4639 TD s1 = *((TD *)vs1 + HD(0)); \
4640 \
4641 VSTART_CHECK_EARLY_EXIT(env, vl); \
4642 \
4643 for (i = env->vstart; i < vl; i++) { \
4644 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4645 if (!vm && !vext_elem_mask(v0, i)) { \
4646 continue; \
4647 } \
4648 s1 = OP(s1, (TD)s2); \
4649 } \
4650 if (vl > 0) { \
4651 *((TD *)vd + HD(0)) = s1; \
4652 } \
4653 env->vstart = 0; \
4654 /* set tail elements to 1s */ \
4655 vext_set_elems_1s(vd, vta, esz, vlenb); \
4656 }
4657
4658 /* vd[0] = sum(vs1[0], vs2[*]) */
4659 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4660 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4661 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4662 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4663
4664 /* vd[0] = maxu(vs1[0], vs2[*]) */
4665 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4666 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4667 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4668 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4669
4670 /* vd[0] = max(vs1[0], vs2[*]) */
4671 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4672 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4673 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4674 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4675
4676 /* vd[0] = minu(vs1[0], vs2[*]) */
4677 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4678 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4679 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4680 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4681
4682 /* vd[0] = min(vs1[0], vs2[*]) */
4683 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4684 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4685 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4686 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4687
4688 /* vd[0] = and(vs1[0], vs2[*]) */
4689 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4690 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4691 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4692 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4693
4694 /* vd[0] = or(vs1[0], vs2[*]) */
4695 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4696 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4697 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4698 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4699
4700 /* vd[0] = xor(vs1[0], vs2[*]) */
4701 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4702 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4703 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4704 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4705
4706 /* Vector Widening Integer Reduction Instructions */
4707 /* signed sum reduction into double-width accumulator */
4708 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4709 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4710 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4711
4712 /* Unsigned sum reduction into double-width accumulator */
4713 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4714 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4715 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4716
4717 /* Vector Single-Width Floating-Point Reduction Instructions */
4718 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4719 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4720 void *vs2, CPURISCVState *env, \
4721 uint32_t desc) \
4722 { \
4723 uint32_t vm = vext_vm(desc); \
4724 uint32_t vl = env->vl; \
4725 uint32_t esz = sizeof(TD); \
4726 uint32_t vlenb = simd_maxsz(desc); \
4727 uint32_t vta = vext_vta(desc); \
4728 uint32_t i; \
4729 TD s1 = *((TD *)vs1 + HD(0)); \
4730 \
4731 VSTART_CHECK_EARLY_EXIT(env, vl); \
4732 \
4733 for (i = env->vstart; i < vl; i++) { \
4734 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4735 if (!vm && !vext_elem_mask(v0, i)) { \
4736 continue; \
4737 } \
4738 s1 = OP(s1, (TD)s2, &env->fp_status); \
4739 } \
4740 if (vl > 0) { \
4741 *((TD *)vd + HD(0)) = s1; \
4742 } \
4743 env->vstart = 0; \
4744 /* set tail elements to 1s */ \
4745 vext_set_elems_1s(vd, vta, esz, vlenb); \
4746 }
4747
4748 /* Unordered sum */
4749 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4750 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4751 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4752
4753 /* Ordered sum */
4754 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4755 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4756 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4757
4758 /* Maximum value */
4759 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4760 float16_maximum_number)
4761 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4762 float32_maximum_number)
4763 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4764 float64_maximum_number)
4765
4766 /* Minimum value */
4767 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4768 float16_minimum_number)
4769 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4770 float32_minimum_number)
4771 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4772 float64_minimum_number)
4773
4774 /* Vector Widening Floating-Point Add Instructions */
4775 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
4776 {
4777 return float32_add(a, float16_to_float32(b, true, s), s);
4778 }
4779
fwadd32(uint64_t a,uint32_t b,float_status * s)4780 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
4781 {
4782 return float64_add(a, float32_to_float64(b, s), s);
4783 }
4784
4785 /* Vector Widening Floating-Point Reduction Instructions */
4786 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
GEN_VEXT_FRED(vfwredusum_vs_h,uint32_t,uint16_t,H4,H2,fwadd16)4787 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4788 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4789 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4790 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4791
4792 /*
4793 * Vector Mask Operations
4794 */
4795 /* Vector Mask-Register Logical Instructions */
4796 #define GEN_VEXT_MASK_VV(NAME, OP) \
4797 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4798 void *vs2, CPURISCVState *env, \
4799 uint32_t desc) \
4800 { \
4801 uint32_t vl = env->vl; \
4802 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
4803 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4804 uint32_t i; \
4805 int a, b; \
4806 \
4807 VSTART_CHECK_EARLY_EXIT(env, vl); \
4808 \
4809 for (i = env->vstart; i < vl; i++) { \
4810 a = vext_elem_mask(vs1, i); \
4811 b = vext_elem_mask(vs2, i); \
4812 vext_set_elem_mask(vd, i, OP(b, a)); \
4813 } \
4814 env->vstart = 0; \
4815 /*
4816 * mask destination register are always tail-agnostic
4817 * set tail elements to 1s
4818 */ \
4819 if (vta_all_1s) { \
4820 for (; i < total_elems; i++) { \
4821 vext_set_elem_mask(vd, i, 1); \
4822 } \
4823 } \
4824 }
4825
4826 #define DO_NAND(N, M) (!(N & M))
4827 #define DO_ANDNOT(N, M) (N & !M)
4828 #define DO_NOR(N, M) (!(N | M))
4829 #define DO_ORNOT(N, M) (N | !M)
4830 #define DO_XNOR(N, M) (!(N ^ M))
4831
4832 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4833 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4834 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4835 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4836 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4837 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4838 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4839 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4840
4841 /* Vector count population in mask vcpop */
4842 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4843 uint32_t desc)
4844 {
4845 target_ulong cnt = 0;
4846 uint32_t vm = vext_vm(desc);
4847 uint32_t vl = env->vl;
4848 int i;
4849
4850 for (i = env->vstart; i < vl; i++) {
4851 if (vm || vext_elem_mask(v0, i)) {
4852 if (vext_elem_mask(vs2, i)) {
4853 cnt++;
4854 }
4855 }
4856 }
4857 env->vstart = 0;
4858 return cnt;
4859 }
4860
4861 /* vfirst find-first-set mask bit */
HELPER(vfirst_m)4862 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4863 uint32_t desc)
4864 {
4865 uint32_t vm = vext_vm(desc);
4866 uint32_t vl = env->vl;
4867 int i;
4868
4869 for (i = env->vstart; i < vl; i++) {
4870 if (vm || vext_elem_mask(v0, i)) {
4871 if (vext_elem_mask(vs2, i)) {
4872 return i;
4873 }
4874 }
4875 }
4876 env->vstart = 0;
4877 return -1LL;
4878 }
4879
4880 enum set_mask_type {
4881 ONLY_FIRST = 1,
4882 INCLUDE_FIRST,
4883 BEFORE_FIRST,
4884 };
4885
vmsetm(void * vd,void * v0,void * vs2,CPURISCVState * env,uint32_t desc,enum set_mask_type type)4886 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4887 uint32_t desc, enum set_mask_type type)
4888 {
4889 uint32_t vm = vext_vm(desc);
4890 uint32_t vl = env->vl;
4891 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
4892 uint32_t vta_all_1s = vext_vta_all_1s(desc);
4893 uint32_t vma = vext_vma(desc);
4894 int i;
4895 bool first_mask_bit = false;
4896
4897 VSTART_CHECK_EARLY_EXIT(env, vl);
4898
4899 for (i = env->vstart; i < vl; i++) {
4900 if (!vm && !vext_elem_mask(v0, i)) {
4901 /* set masked-off elements to 1s */
4902 if (vma) {
4903 vext_set_elem_mask(vd, i, 1);
4904 }
4905 continue;
4906 }
4907 /* write a zero to all following active elements */
4908 if (first_mask_bit) {
4909 vext_set_elem_mask(vd, i, 0);
4910 continue;
4911 }
4912 if (vext_elem_mask(vs2, i)) {
4913 first_mask_bit = true;
4914 if (type == BEFORE_FIRST) {
4915 vext_set_elem_mask(vd, i, 0);
4916 } else {
4917 vext_set_elem_mask(vd, i, 1);
4918 }
4919 } else {
4920 if (type == ONLY_FIRST) {
4921 vext_set_elem_mask(vd, i, 0);
4922 } else {
4923 vext_set_elem_mask(vd, i, 1);
4924 }
4925 }
4926 }
4927 env->vstart = 0;
4928 /*
4929 * mask destination register are always tail-agnostic
4930 * set tail elements to 1s
4931 */
4932 if (vta_all_1s) {
4933 for (; i < total_elems; i++) {
4934 vext_set_elem_mask(vd, i, 1);
4935 }
4936 }
4937 }
4938
HELPER(vmsbf_m)4939 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4940 uint32_t desc)
4941 {
4942 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4943 }
4944
HELPER(vmsif_m)4945 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4946 uint32_t desc)
4947 {
4948 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4949 }
4950
HELPER(vmsof_m)4951 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4952 uint32_t desc)
4953 {
4954 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4955 }
4956
4957 /* Vector Iota Instruction */
4958 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
4959 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4960 uint32_t desc) \
4961 { \
4962 uint32_t vm = vext_vm(desc); \
4963 uint32_t vl = env->vl; \
4964 uint32_t esz = sizeof(ETYPE); \
4965 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4966 uint32_t vta = vext_vta(desc); \
4967 uint32_t vma = vext_vma(desc); \
4968 uint32_t sum = 0; \
4969 int i; \
4970 \
4971 VSTART_CHECK_EARLY_EXIT(env, vl); \
4972 \
4973 for (i = env->vstart; i < vl; i++) { \
4974 if (!vm && !vext_elem_mask(v0, i)) { \
4975 /* set masked-off elements to 1s */ \
4976 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4977 continue; \
4978 } \
4979 *((ETYPE *)vd + H(i)) = sum; \
4980 if (vext_elem_mask(vs2, i)) { \
4981 sum++; \
4982 } \
4983 } \
4984 env->vstart = 0; \
4985 /* set tail elements to 1s */ \
4986 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4987 }
4988
GEN_VEXT_VIOTA_M(viota_m_b,uint8_t,H1)4989 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4990 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4991 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4992 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4993
4994 /* Vector Element Index Instruction */
4995 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
4996 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4997 { \
4998 uint32_t vm = vext_vm(desc); \
4999 uint32_t vl = env->vl; \
5000 uint32_t esz = sizeof(ETYPE); \
5001 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5002 uint32_t vta = vext_vta(desc); \
5003 uint32_t vma = vext_vma(desc); \
5004 int i; \
5005 \
5006 VSTART_CHECK_EARLY_EXIT(env, vl); \
5007 \
5008 for (i = env->vstart; i < vl; i++) { \
5009 if (!vm && !vext_elem_mask(v0, i)) { \
5010 /* set masked-off elements to 1s */ \
5011 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5012 continue; \
5013 } \
5014 *((ETYPE *)vd + H(i)) = i; \
5015 } \
5016 env->vstart = 0; \
5017 /* set tail elements to 1s */ \
5018 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5019 }
5020
5021 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
5022 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
5023 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
5024 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
5025
5026 /*
5027 * Vector Permutation Instructions
5028 */
5029
5030 /* Vector Slide Instructions */
5031 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
5032 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5033 CPURISCVState *env, uint32_t desc) \
5034 { \
5035 uint32_t vm = vext_vm(desc); \
5036 uint32_t vl = env->vl; \
5037 uint32_t esz = sizeof(ETYPE); \
5038 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5039 uint32_t vta = vext_vta(desc); \
5040 uint32_t vma = vext_vma(desc); \
5041 target_ulong offset = s1, i_min, i; \
5042 \
5043 VSTART_CHECK_EARLY_EXIT(env, vl); \
5044 \
5045 i_min = MAX(env->vstart, offset); \
5046 for (i = i_min; i < vl; i++) { \
5047 if (!vm && !vext_elem_mask(v0, i)) { \
5048 /* set masked-off elements to 1s */ \
5049 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5050 continue; \
5051 } \
5052 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
5053 } \
5054 env->vstart = 0; \
5055 /* set tail elements to 1s */ \
5056 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5057 }
5058
5059 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
5060 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
5061 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
5062 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
5063 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
5064
5065 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
5066 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5067 CPURISCVState *env, uint32_t desc) \
5068 { \
5069 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5070 uint32_t vm = vext_vm(desc); \
5071 uint32_t vl = env->vl; \
5072 uint32_t esz = sizeof(ETYPE); \
5073 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5074 uint32_t vta = vext_vta(desc); \
5075 uint32_t vma = vext_vma(desc); \
5076 target_ulong i_max, i_min, i; \
5077 \
5078 VSTART_CHECK_EARLY_EXIT(env, vl); \
5079 \
5080 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
5081 i_max = MAX(i_min, env->vstart); \
5082 for (i = env->vstart; i < i_max; ++i) { \
5083 if (!vm && !vext_elem_mask(v0, i)) { \
5084 /* set masked-off elements to 1s */ \
5085 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5086 continue; \
5087 } \
5088 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
5089 } \
5090 \
5091 for (i = i_max; i < vl; ++i) { \
5092 if (vm || vext_elem_mask(v0, i)) { \
5093 *((ETYPE *)vd + H(i)) = 0; \
5094 } \
5095 } \
5096 \
5097 env->vstart = 0; \
5098 /* set tail elements to 1s */ \
5099 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5100 }
5101
5102 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
5103 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5104 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5105 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5106 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
5107
5108 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
5109 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5110 void *vs2, CPURISCVState *env, \
5111 uint32_t desc) \
5112 { \
5113 typedef uint##BITWIDTH##_t ETYPE; \
5114 uint32_t vm = vext_vm(desc); \
5115 uint32_t vl = env->vl; \
5116 uint32_t esz = sizeof(ETYPE); \
5117 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5118 uint32_t vta = vext_vta(desc); \
5119 uint32_t vma = vext_vma(desc); \
5120 uint32_t i; \
5121 \
5122 VSTART_CHECK_EARLY_EXIT(env, vl); \
5123 \
5124 for (i = env->vstart; i < vl; i++) { \
5125 if (!vm && !vext_elem_mask(v0, i)) { \
5126 /* set masked-off elements to 1s */ \
5127 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5128 continue; \
5129 } \
5130 if (i == 0) { \
5131 *((ETYPE *)vd + H(i)) = s1; \
5132 } else { \
5133 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5134 } \
5135 } \
5136 env->vstart = 0; \
5137 /* set tail elements to 1s */ \
5138 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5139 }
5140
5141 GEN_VEXT_VSLIE1UP(8, H1)
5142 GEN_VEXT_VSLIE1UP(16, H2)
5143 GEN_VEXT_VSLIE1UP(32, H4)
5144 GEN_VEXT_VSLIE1UP(64, H8)
5145
5146 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
5147 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5148 CPURISCVState *env, uint32_t desc) \
5149 { \
5150 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5151 }
5152
5153 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
5154 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5155 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5156 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5157 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5158
5159 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
5160 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5161 void *vs2, CPURISCVState *env, \
5162 uint32_t desc) \
5163 { \
5164 typedef uint##BITWIDTH##_t ETYPE; \
5165 uint32_t vm = vext_vm(desc); \
5166 uint32_t vl = env->vl; \
5167 uint32_t esz = sizeof(ETYPE); \
5168 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5169 uint32_t vta = vext_vta(desc); \
5170 uint32_t vma = vext_vma(desc); \
5171 uint32_t i; \
5172 \
5173 VSTART_CHECK_EARLY_EXIT(env, vl); \
5174 \
5175 for (i = env->vstart; i < vl; i++) { \
5176 if (!vm && !vext_elem_mask(v0, i)) { \
5177 /* set masked-off elements to 1s */ \
5178 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5179 continue; \
5180 } \
5181 if (i == vl - 1) { \
5182 *((ETYPE *)vd + H(i)) = s1; \
5183 } else { \
5184 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5185 } \
5186 } \
5187 env->vstart = 0; \
5188 /* set tail elements to 1s */ \
5189 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5190 }
5191
5192 GEN_VEXT_VSLIDE1DOWN(8, H1)
5193 GEN_VEXT_VSLIDE1DOWN(16, H2)
5194 GEN_VEXT_VSLIDE1DOWN(32, H4)
5195 GEN_VEXT_VSLIDE1DOWN(64, H8)
5196
5197 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
5198 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5199 CPURISCVState *env, uint32_t desc) \
5200 { \
5201 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5202 }
5203
5204 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
5205 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5206 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5207 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5208 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5209
5210 /* Vector Floating-Point Slide Instructions */
5211 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
5212 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5213 CPURISCVState *env, uint32_t desc) \
5214 { \
5215 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5216 }
5217
5218 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5219 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5220 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5221 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5222
5223 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
5224 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5225 CPURISCVState *env, uint32_t desc) \
5226 { \
5227 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5228 }
5229
5230 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5231 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5232 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5233 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
5234
5235 /* Vector Register Gather Instruction */
5236 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
5237 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5238 CPURISCVState *env, uint32_t desc) \
5239 { \
5240 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
5241 uint32_t vm = vext_vm(desc); \
5242 uint32_t vl = env->vl; \
5243 uint32_t esz = sizeof(TS2); \
5244 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5245 uint32_t vta = vext_vta(desc); \
5246 uint32_t vma = vext_vma(desc); \
5247 uint64_t index; \
5248 uint32_t i; \
5249 \
5250 VSTART_CHECK_EARLY_EXIT(env, vl); \
5251 \
5252 for (i = env->vstart; i < vl; i++) { \
5253 if (!vm && !vext_elem_mask(v0, i)) { \
5254 /* set masked-off elements to 1s */ \
5255 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5256 continue; \
5257 } \
5258 index = *((TS1 *)vs1 + HS1(i)); \
5259 if (index >= vlmax) { \
5260 *((TS2 *)vd + HS2(i)) = 0; \
5261 } else { \
5262 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
5263 } \
5264 } \
5265 env->vstart = 0; \
5266 /* set tail elements to 1s */ \
5267 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5268 }
5269
5270 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
5271 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5272 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5273 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5274 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5275
5276 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5277 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5278 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5279 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5280
5281 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
5282 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5283 CPURISCVState *env, uint32_t desc) \
5284 { \
5285 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5286 uint32_t vm = vext_vm(desc); \
5287 uint32_t vl = env->vl; \
5288 uint32_t esz = sizeof(ETYPE); \
5289 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5290 uint32_t vta = vext_vta(desc); \
5291 uint32_t vma = vext_vma(desc); \
5292 uint64_t index = s1; \
5293 uint32_t i; \
5294 \
5295 VSTART_CHECK_EARLY_EXIT(env, vl); \
5296 \
5297 for (i = env->vstart; i < vl; i++) { \
5298 if (!vm && !vext_elem_mask(v0, i)) { \
5299 /* set masked-off elements to 1s */ \
5300 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5301 continue; \
5302 } \
5303 if (index >= vlmax) { \
5304 *((ETYPE *)vd + H(i)) = 0; \
5305 } else { \
5306 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5307 } \
5308 } \
5309 env->vstart = 0; \
5310 /* set tail elements to 1s */ \
5311 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5312 }
5313
5314 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
5315 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5316 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5317 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5318 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
5319
5320 /* Vector Compress Instruction */
5321 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
5322 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5323 CPURISCVState *env, uint32_t desc) \
5324 { \
5325 uint32_t vl = env->vl; \
5326 uint32_t esz = sizeof(ETYPE); \
5327 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5328 uint32_t vta = vext_vta(desc); \
5329 uint32_t num = 0, i; \
5330 \
5331 VSTART_CHECK_EARLY_EXIT(env, vl); \
5332 \
5333 for (i = env->vstart; i < vl; i++) { \
5334 if (!vext_elem_mask(vs1, i)) { \
5335 continue; \
5336 } \
5337 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5338 num++; \
5339 } \
5340 env->vstart = 0; \
5341 /* set tail elements to 1s */ \
5342 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \
5343 }
5344
5345 /* Compress into vd elements of vs2 where vs1 is enabled */
5346 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5347 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5348 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5349 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5350
5351 /* Vector Whole Register Move */
5352 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5353 {
5354 /* EEW = SEW */
5355 uint32_t maxsz = simd_maxsz(desc);
5356 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5357 uint32_t startb = env->vstart * sewb;
5358 uint32_t i = startb;
5359
5360 if (startb >= maxsz) {
5361 env->vstart = 0;
5362 return;
5363 }
5364
5365 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5366 uint32_t j = ROUND_UP(i, 8);
5367 memcpy((uint8_t *)vd + H1(j - 1),
5368 (uint8_t *)vs2 + H1(j - 1),
5369 j - i);
5370 i = j;
5371 }
5372
5373 memcpy((uint8_t *)vd + H1(i),
5374 (uint8_t *)vs2 + H1(i),
5375 maxsz - i);
5376
5377 env->vstart = 0;
5378 }
5379
5380 /* Vector Integer Extension */
5381 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5382 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5383 CPURISCVState *env, uint32_t desc) \
5384 { \
5385 uint32_t vl = env->vl; \
5386 uint32_t vm = vext_vm(desc); \
5387 uint32_t esz = sizeof(ETYPE); \
5388 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5389 uint32_t vta = vext_vta(desc); \
5390 uint32_t vma = vext_vma(desc); \
5391 uint32_t i; \
5392 \
5393 VSTART_CHECK_EARLY_EXIT(env, vl); \
5394 \
5395 for (i = env->vstart; i < vl; i++) { \
5396 if (!vm && !vext_elem_mask(v0, i)) { \
5397 /* set masked-off elements to 1s */ \
5398 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5399 continue; \
5400 } \
5401 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5402 } \
5403 env->vstart = 0; \
5404 /* set tail elements to 1s */ \
5405 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5406 }
5407
5408 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5409 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5410 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5411 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5412 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5413 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5414
5415 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5416 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5417 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5418 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5419 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5420 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)
5421