1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/cpu_ldst.h"
26 #include "exec/page-protection.h"
27 #include "exec/helper-proto.h"
28 #include "fpu/softfloat.h"
29 #include "tcg/tcg-gvec-desc.h"
30 #include "internals.h"
31 #include "vector_internals.h"
32 #include <math.h>
33
HELPER(vsetvl)34 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
35 target_ulong s2)
36 {
37 int vlmax, vl;
38 RISCVCPU *cpu = env_archcpu(env);
39 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
40 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
41 uint16_t sew = 8 << vsew;
42 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
43 int xlen = riscv_cpu_xlen(env);
44 bool vill = (s2 >> (xlen - 1)) & 0x1;
45 target_ulong reserved = s2 &
46 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
47 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
48 uint16_t vlen = cpu->cfg.vlenb << 3;
49 int8_t lmul;
50
51 if (vlmul & 4) {
52 /*
53 * Fractional LMUL, check:
54 *
55 * VLEN * LMUL >= SEW
56 * VLEN >> (8 - lmul) >= sew
57 * (vlenb << 3) >> (8 - lmul) >= sew
58 */
59 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
60 vill = true;
61 }
62 }
63
64 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
65 /* only set vill bit. */
66 env->vill = 1;
67 env->vtype = 0;
68 env->vl = 0;
69 env->vstart = 0;
70 return 0;
71 }
72
73 /* lmul encoded as in DisasContext::lmul */
74 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
75 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
76 if (s1 <= vlmax) {
77 vl = s1;
78 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) {
79 vl = (s1 + 1) >> 1;
80 } else {
81 vl = vlmax;
82 }
83 env->vl = vl;
84 env->vtype = s2;
85 env->vstart = 0;
86 env->vill = 0;
87 return vl;
88 }
89
90 /*
91 * Get the maximum number of elements can be operated.
92 *
93 * log2_esz: log2 of element size in bytes.
94 */
vext_max_elems(uint32_t desc,uint32_t log2_esz)95 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
96 {
97 /*
98 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
99 * so vlen in bytes (vlenb) is encoded as maxsz.
100 */
101 uint32_t vlenb = simd_maxsz(desc);
102
103 /* Return VLMAX */
104 int scale = vext_lmul(desc) - log2_esz;
105 return scale < 0 ? vlenb >> -scale : vlenb << scale;
106 }
107
108 /*
109 * This function checks watchpoint before real load operation.
110 *
111 * In system mode, the TLB API probe_access is enough for watchpoint check.
112 * In user mode, there is no watchpoint support now.
113 *
114 * It will trigger an exception if there is no mapping in TLB
115 * and page table walk can't fill the TLB entry. Then the guest
116 * software can return here after process the exception or never return.
117 */
probe_pages(CPURISCVState * env,target_ulong addr,target_ulong len,uintptr_t ra,MMUAccessType access_type)118 static void probe_pages(CPURISCVState *env, target_ulong addr,
119 target_ulong len, uintptr_t ra,
120 MMUAccessType access_type)
121 {
122 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
123 target_ulong curlen = MIN(pagelen, len);
124 int mmu_index = riscv_env_mmu_index(env, false);
125
126 probe_access(env, adjust_addr(env, addr), curlen, access_type,
127 mmu_index, ra);
128 if (len > curlen) {
129 addr += curlen;
130 curlen = len - curlen;
131 probe_access(env, adjust_addr(env, addr), curlen, access_type,
132 mmu_index, ra);
133 }
134 }
135
vext_set_elem_mask(void * v0,int index,uint8_t value)136 static inline void vext_set_elem_mask(void *v0, int index,
137 uint8_t value)
138 {
139 int idx = index / 64;
140 int pos = index % 64;
141 uint64_t old = ((uint64_t *)v0)[idx];
142 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
143 }
144
145 /* elements operations for load and store */
146 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr,
147 uint32_t idx, void *vd, uintptr_t retaddr);
148 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host);
149
150 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
151 static inline QEMU_ALWAYS_INLINE \
152 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
153 uint32_t idx, void *vd, uintptr_t retaddr) \
154 { \
155 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
156 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
157 } \
158 \
159 static inline QEMU_ALWAYS_INLINE \
160 void NAME##_host(void *vd, uint32_t idx, void *host) \
161 { \
162 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
163 *cur = (ETYPE)LDSUF##_p(host); \
164 }
165
GEN_VEXT_LD_ELEM(lde_b,uint8_t,H1,ldub)166 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub)
167 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw)
168 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl)
169 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq)
170
171 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
172 static inline QEMU_ALWAYS_INLINE \
173 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
174 uint32_t idx, void *vd, uintptr_t retaddr) \
175 { \
176 ETYPE data = *((ETYPE *)vd + H(idx)); \
177 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
178 } \
179 \
180 static inline QEMU_ALWAYS_INLINE \
181 void NAME##_host(void *vd, uint32_t idx, void *host) \
182 { \
183 ETYPE data = *((ETYPE *)vd + H(idx)); \
184 STSUF##_p(host, data); \
185 }
186
187 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb)
188 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw)
189 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl)
190 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq)
191
192 static inline QEMU_ALWAYS_INLINE void
193 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb,
194 void *vd, uint32_t evl, target_ulong addr,
195 uint32_t reg_start, uintptr_t ra, uint32_t esz,
196 bool is_load)
197 {
198 uint32_t i;
199 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) {
200 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra);
201 }
202 }
203
204 static inline QEMU_ALWAYS_INLINE void
vext_continuous_ldst_host(CPURISCVState * env,vext_ldst_elem_fn_host * ldst_host,void * vd,uint32_t evl,uint32_t reg_start,void * host,uint32_t esz,bool is_load)205 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host,
206 void *vd, uint32_t evl, uint32_t reg_start, void *host,
207 uint32_t esz, bool is_load)
208 {
209 #if HOST_BIG_ENDIAN
210 for (; reg_start < evl; reg_start++, host += esz) {
211 ldst_host(vd, reg_start, host);
212 }
213 #else
214 if (esz == 1) {
215 uint32_t byte_offset = reg_start * esz;
216 uint32_t size = (evl - reg_start) * esz;
217
218 if (is_load) {
219 memcpy(vd + byte_offset, host, size);
220 } else {
221 memcpy(host, vd + byte_offset, size);
222 }
223 } else {
224 for (; reg_start < evl; reg_start++, host += esz) {
225 ldst_host(vd, reg_start, host);
226 }
227 }
228 #endif
229 }
230
vext_set_tail_elems_1s(target_ulong vl,void * vd,uint32_t desc,uint32_t nf,uint32_t esz,uint32_t max_elems)231 static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
232 uint32_t desc, uint32_t nf,
233 uint32_t esz, uint32_t max_elems)
234 {
235 uint32_t vta = vext_vta(desc);
236 int k;
237
238 if (vta == 0) {
239 return;
240 }
241
242 for (k = 0; k < nf; ++k) {
243 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
244 (k * max_elems + max_elems) * esz);
245 }
246 }
247
248 /*
249 * stride: access vector element from strided memory
250 */
251 static void
vext_ldst_stride(void * vd,void * v0,target_ulong base,target_ulong stride,CPURISCVState * env,uint32_t desc,uint32_t vm,vext_ldst_elem_fn_tlb * ldst_elem,uint32_t log2_esz,uintptr_t ra)252 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride,
253 CPURISCVState *env, uint32_t desc, uint32_t vm,
254 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz,
255 uintptr_t ra)
256 {
257 uint32_t i, k;
258 uint32_t nf = vext_nf(desc);
259 uint32_t max_elems = vext_max_elems(desc, log2_esz);
260 uint32_t esz = 1 << log2_esz;
261 uint32_t vma = vext_vma(desc);
262
263 VSTART_CHECK_EARLY_EXIT(env, env->vl);
264
265 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
266 k = 0;
267 while (k < nf) {
268 if (!vm && !vext_elem_mask(v0, i)) {
269 /* set masked-off elements to 1s */
270 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
271 (i + k * max_elems + 1) * esz);
272 k++;
273 continue;
274 }
275 target_ulong addr = base + stride * i + (k << log2_esz);
276 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
277 k++;
278 }
279 }
280 env->vstart = 0;
281
282 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
283 }
284
285 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
286 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
287 target_ulong stride, CPURISCVState *env, \
288 uint32_t desc) \
289 { \
290 uint32_t vm = vext_vm(desc); \
291 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
292 ctzl(sizeof(ETYPE)), GETPC()); \
293 }
294
GEN_VEXT_LD_STRIDE(vlse8_v,int8_t,lde_b_tlb)295 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb)
296 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb)
297 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb)
298 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb)
299
300 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
301 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
302 target_ulong stride, CPURISCVState *env, \
303 uint32_t desc) \
304 { \
305 uint32_t vm = vext_vm(desc); \
306 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
307 ctzl(sizeof(ETYPE)), GETPC()); \
308 }
309
310 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb)
311 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb)
312 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb)
313 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb)
314
315 /*
316 * unit-stride: access elements stored contiguously in memory
317 */
318
319 /* unmasked unit-stride load and store operation */
320 static inline QEMU_ALWAYS_INLINE void
321 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr,
322 uint32_t elems, uint32_t nf, uint32_t max_elems,
323 uint32_t log2_esz, bool is_load, int mmu_index,
324 vext_ldst_elem_fn_tlb *ldst_tlb,
325 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra)
326 {
327 void *host;
328 int i, k, flags;
329 uint32_t esz = 1 << log2_esz;
330 uint32_t size = (elems * nf) << log2_esz;
331 uint32_t evl = env->vstart + elems;
332 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
333
334 /* Check page permission/pmp/watchpoint/etc. */
335 flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type,
336 mmu_index, true, &host, ra);
337
338 if (flags == 0) {
339 if (nf == 1) {
340 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart,
341 host, esz, is_load);
342 } else {
343 for (i = env->vstart; i < evl; ++i) {
344 k = 0;
345 while (k < nf) {
346 ldst_host(vd, i + k * max_elems, host);
347 host += esz;
348 k++;
349 }
350 }
351 }
352 env->vstart += elems;
353 } else {
354 if (nf == 1) {
355 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart,
356 ra, esz, is_load);
357 } else {
358 /* load bytes from guest memory */
359 for (i = env->vstart; i < evl; env->vstart = ++i) {
360 k = 0;
361 while (k < nf) {
362 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
363 vd, ra);
364 addr += esz;
365 k++;
366 }
367 }
368 }
369 }
370 }
371
372 static inline QEMU_ALWAYS_INLINE void
vext_ldst_us(void * vd,target_ulong base,CPURISCVState * env,uint32_t desc,vext_ldst_elem_fn_tlb * ldst_tlb,vext_ldst_elem_fn_host * ldst_host,uint32_t log2_esz,uint32_t evl,uintptr_t ra,bool is_load)373 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
374 vext_ldst_elem_fn_tlb *ldst_tlb,
375 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
376 uint32_t evl, uintptr_t ra, bool is_load)
377 {
378 uint32_t k;
379 target_ulong page_split, elems, addr;
380 uint32_t nf = vext_nf(desc);
381 uint32_t max_elems = vext_max_elems(desc, log2_esz);
382 uint32_t esz = 1 << log2_esz;
383 uint32_t msize = nf * esz;
384 int mmu_index = riscv_env_mmu_index(env, false);
385
386 VSTART_CHECK_EARLY_EXIT(env, evl);
387
388 #if defined(CONFIG_USER_ONLY)
389 /*
390 * For data sizes <= 6 bytes we get better performance by simply calling
391 * vext_continuous_ldst_tlb
392 */
393 if (nf == 1 && (evl << log2_esz) <= 6) {
394 addr = base + (env->vstart << log2_esz);
395 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra,
396 esz, is_load);
397
398 env->vstart = 0;
399 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
400 return;
401 }
402 #endif
403
404 /* Calculate the page range of first page */
405 addr = base + ((env->vstart * nf) << log2_esz);
406 page_split = -(addr | TARGET_PAGE_MASK);
407 /* Get number of elements */
408 elems = page_split / msize;
409 if (unlikely(env->vstart + elems >= evl)) {
410 elems = evl - env->vstart;
411 }
412
413 /* Load/store elements in the first page */
414 if (likely(elems)) {
415 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
416 is_load, mmu_index, ldst_tlb, ldst_host, ra);
417 }
418
419 /* Load/store elements in the second page */
420 if (unlikely(env->vstart < evl)) {
421 /* Cross page element */
422 if (unlikely(page_split % msize)) {
423 for (k = 0; k < nf; k++) {
424 addr = base + ((env->vstart * nf + k) << log2_esz);
425 ldst_tlb(env, adjust_addr(env, addr),
426 env->vstart + k * max_elems, vd, ra);
427 }
428 env->vstart++;
429 }
430
431 addr = base + ((env->vstart * nf) << log2_esz);
432 /* Get number of elements of second page */
433 elems = evl - env->vstart;
434
435 /* Load/store elements in the second page */
436 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
437 is_load, mmu_index, ldst_tlb, ldst_host, ra);
438 }
439
440 env->vstart = 0;
441 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
442 }
443
444 /*
445 * masked unit-stride load and store operation will be a special case of
446 * stride, stride = NF * sizeof (ETYPE)
447 */
448
449 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
450 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
451 CPURISCVState *env, uint32_t desc) \
452 { \
453 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
454 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
455 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
456 } \
457 \
458 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
459 CPURISCVState *env, uint32_t desc) \
460 { \
461 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
462 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \
463 }
464
GEN_VEXT_LD_US(vle8_v,int8_t,lde_b_tlb,lde_b_host)465 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host)
466 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host)
467 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host)
468 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host)
469
470 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
471 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
472 CPURISCVState *env, uint32_t desc) \
473 { \
474 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
475 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
476 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
477 } \
478 \
479 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
480 CPURISCVState *env, uint32_t desc) \
481 { \
482 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
483 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \
484 }
485
486 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host)
487 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host)
488 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host)
489 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host)
490
491 /*
492 * unit stride mask load and store, EEW = 1
493 */
494 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
495 CPURISCVState *env, uint32_t desc)
496 {
497 /* evl = ceil(vl/8) */
498 uint8_t evl = (env->vl + 7) >> 3;
499 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host,
500 0, evl, GETPC(), true);
501 }
502
HELPER(vsm_v)503 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
504 CPURISCVState *env, uint32_t desc)
505 {
506 /* evl = ceil(vl/8) */
507 uint8_t evl = (env->vl + 7) >> 3;
508 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host,
509 0, evl, GETPC(), false);
510 }
511
512 /*
513 * index: access vector element from indexed memory
514 */
515 typedef target_ulong vext_get_index_addr(target_ulong base,
516 uint32_t idx, void *vs2);
517
518 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
519 static target_ulong NAME(target_ulong base, \
520 uint32_t idx, void *vs2) \
521 { \
522 return (base + *((ETYPE *)vs2 + H(idx))); \
523 }
524
GEN_VEXT_GET_INDEX_ADDR(idx_b,uint8_t,H1)525 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
526 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
527 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
528 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
529
530 static inline void
531 vext_ldst_index(void *vd, void *v0, target_ulong base,
532 void *vs2, CPURISCVState *env, uint32_t desc,
533 vext_get_index_addr get_index_addr,
534 vext_ldst_elem_fn_tlb *ldst_elem,
535 uint32_t log2_esz, uintptr_t ra)
536 {
537 uint32_t i, k;
538 uint32_t nf = vext_nf(desc);
539 uint32_t vm = vext_vm(desc);
540 uint32_t max_elems = vext_max_elems(desc, log2_esz);
541 uint32_t esz = 1 << log2_esz;
542 uint32_t vma = vext_vma(desc);
543
544 VSTART_CHECK_EARLY_EXIT(env, env->vl);
545
546 /* load bytes from guest memory */
547 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
548 k = 0;
549 while (k < nf) {
550 if (!vm && !vext_elem_mask(v0, i)) {
551 /* set masked-off elements to 1s */
552 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
553 (i + k * max_elems + 1) * esz);
554 k++;
555 continue;
556 }
557 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
558 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
559 k++;
560 }
561 }
562 env->vstart = 0;
563
564 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
565 }
566
567 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
568 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
569 void *vs2, CPURISCVState *env, uint32_t desc) \
570 { \
571 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
572 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
573 }
574
GEN_VEXT_LD_INDEX(vlxei8_8_v,int8_t,idx_b,lde_b_tlb)575 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb)
576 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb)
577 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb)
578 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb)
579 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb)
580 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb)
581 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb)
582 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb)
583 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb)
584 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb)
585 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb)
586 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb)
587 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb)
588 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb)
589 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb)
590 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb)
591
592 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
593 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
594 void *vs2, CPURISCVState *env, uint32_t desc) \
595 { \
596 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
597 STORE_FN, ctzl(sizeof(ETYPE)), \
598 GETPC()); \
599 }
600
601 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb)
602 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb)
603 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb)
604 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb)
605 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb)
606 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb)
607 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb)
608 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb)
609 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb)
610 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb)
611 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb)
612 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb)
613 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb)
614 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb)
615 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb)
616 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb)
617
618 /*
619 * unit-stride fault-only-fisrt load instructions
620 */
621 static inline void
622 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env,
623 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb,
624 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra)
625 {
626 uint32_t i, k, vl = 0;
627 uint32_t nf = vext_nf(desc);
628 uint32_t vm = vext_vm(desc);
629 uint32_t max_elems = vext_max_elems(desc, log2_esz);
630 uint32_t esz = 1 << log2_esz;
631 uint32_t msize = nf * esz;
632 uint32_t vma = vext_vma(desc);
633 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems;
634 int mmu_index = riscv_env_mmu_index(env, false);
635 int flags;
636 void *host;
637
638 VSTART_CHECK_EARLY_EXIT(env, env->vl);
639
640 addr = base + ((env->vstart * nf) << log2_esz);
641 page_split = -(addr | TARGET_PAGE_MASK);
642 /* Get number of elements */
643 elems = page_split / msize;
644 if (unlikely(env->vstart + elems >= env->vl)) {
645 elems = env->vl - env->vstart;
646 }
647
648 /* Check page permission/pmp/watchpoint/etc. */
649 flags = probe_access_flags(env, adjust_addr(env, addr), elems * msize,
650 MMU_DATA_LOAD, mmu_index, true, &host, ra);
651
652 /* If we are crossing a page check also the second page. */
653 if (env->vl > elems) {
654 addr_probe = addr + (elems << log2_esz);
655 flags |= probe_access_flags(env, adjust_addr(env, addr_probe),
656 elems * msize, MMU_DATA_LOAD, mmu_index,
657 true, &host, ra);
658 }
659
660 if (flags & ~TLB_WATCHPOINT) {
661 /* probe every access */
662 for (i = env->vstart; i < env->vl; i++) {
663 if (!vm && !vext_elem_mask(v0, i)) {
664 continue;
665 }
666 addr_i = adjust_addr(env, base + i * (nf << log2_esz));
667 if (i == 0) {
668 /* Allow fault on first element. */
669 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD);
670 } else {
671 remain = nf << log2_esz;
672 while (remain > 0) {
673 offset = -(addr_i | TARGET_PAGE_MASK);
674
675 /* Probe nonfault on subsequent elements. */
676 flags = probe_access_flags(env, addr_i, offset,
677 MMU_DATA_LOAD, mmu_index, true,
678 &host, 0);
679
680 /*
681 * Stop if invalid (unmapped) or mmio (transaction may
682 * fail). Do not stop if watchpoint, as the spec says that
683 * first-fault should continue to access the same
684 * elements regardless of any watchpoint.
685 */
686 if (flags & ~TLB_WATCHPOINT) {
687 vl = i;
688 goto ProbeSuccess;
689 }
690 if (remain <= offset) {
691 break;
692 }
693 remain -= offset;
694 addr_i = adjust_addr(env, addr_i + offset);
695 }
696 }
697 }
698 }
699 ProbeSuccess:
700 /* load bytes from guest memory */
701 if (vl != 0) {
702 env->vl = vl;
703 }
704
705 if (env->vstart < env->vl) {
706 if (vm) {
707 /* Load/store elements in the first page */
708 if (likely(elems)) {
709 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
710 log2_esz, true, mmu_index, ldst_tlb,
711 ldst_host, ra);
712 }
713
714 /* Load/store elements in the second page */
715 if (unlikely(env->vstart < env->vl)) {
716 /* Cross page element */
717 if (unlikely(page_split % msize)) {
718 for (k = 0; k < nf; k++) {
719 addr = base + ((env->vstart * nf + k) << log2_esz);
720 ldst_tlb(env, adjust_addr(env, addr),
721 env->vstart + k * max_elems, vd, ra);
722 }
723 env->vstart++;
724 }
725
726 addr = base + ((env->vstart * nf) << log2_esz);
727 /* Get number of elements of second page */
728 elems = env->vl - env->vstart;
729
730 /* Load/store elements in the second page */
731 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
732 log2_esz, true, mmu_index, ldst_tlb,
733 ldst_host, ra);
734 }
735 } else {
736 for (i = env->vstart; i < env->vl; i++) {
737 k = 0;
738 while (k < nf) {
739 if (!vext_elem_mask(v0, i)) {
740 /* set masked-off elements to 1s */
741 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
742 (i + k * max_elems + 1) * esz);
743 k++;
744 continue;
745 }
746 addr = base + ((i * nf + k) << log2_esz);
747 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
748 vd, ra);
749 k++;
750 }
751 }
752 }
753 }
754 env->vstart = 0;
755
756 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
757 }
758
759 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
760 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
761 CPURISCVState *env, uint32_t desc) \
762 { \
763 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \
764 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \
765 }
766
GEN_VEXT_LDFF(vle8ff_v,int8_t,lde_b_tlb,lde_b_host)767 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host)
768 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host)
769 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host)
770 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host)
771
772 #define DO_SWAP(N, M) (M)
773 #define DO_AND(N, M) (N & M)
774 #define DO_XOR(N, M) (N ^ M)
775 #define DO_OR(N, M) (N | M)
776 #define DO_ADD(N, M) (N + M)
777
778 /* Signed min/max */
779 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
780 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
781
782 /*
783 * load and store whole register instructions
784 */
785 static inline QEMU_ALWAYS_INLINE void
786 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
787 vext_ldst_elem_fn_tlb *ldst_tlb,
788 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
789 uintptr_t ra, bool is_load)
790 {
791 target_ulong page_split, elems, addr;
792 uint32_t nf = vext_nf(desc);
793 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
794 uint32_t max_elems = vlenb >> log2_esz;
795 uint32_t evl = nf * max_elems;
796 uint32_t esz = 1 << log2_esz;
797 int mmu_index = riscv_env_mmu_index(env, false);
798
799 /* Calculate the page range of first page */
800 addr = base + (env->vstart << log2_esz);
801 page_split = -(addr | TARGET_PAGE_MASK);
802 /* Get number of elements */
803 elems = page_split / esz;
804 if (unlikely(env->vstart + elems >= evl)) {
805 elems = evl - env->vstart;
806 }
807
808 /* Load/store elements in the first page */
809 if (likely(elems)) {
810 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
811 is_load, mmu_index, ldst_tlb, ldst_host, ra);
812 }
813
814 /* Load/store elements in the second page */
815 if (unlikely(env->vstart < evl)) {
816 /* Cross page element */
817 if (unlikely(page_split % esz)) {
818 addr = base + (env->vstart << log2_esz);
819 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra);
820 env->vstart++;
821 }
822
823 addr = base + (env->vstart << log2_esz);
824 /* Get number of elements of second page */
825 elems = evl - env->vstart;
826
827 /* Load/store elements in the second page */
828 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
829 is_load, mmu_index, ldst_tlb, ldst_host, ra);
830 }
831
832 env->vstart = 0;
833 }
834
835 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
836 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
837 uint32_t desc) \
838 { \
839 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
840 ctzl(sizeof(ETYPE)), GETPC(), true); \
841 }
842
GEN_VEXT_LD_WHOLE(vl1re8_v,int8_t,lde_b_tlb,lde_b_host)843 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host)
844 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host)
845 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host)
846 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host)
847 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host)
848 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host)
849 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host)
850 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host)
851 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host)
852 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host)
853 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host)
854 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host)
855 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host)
856 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host)
857 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host)
858 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host)
859
860 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
861 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
862 uint32_t desc) \
863 { \
864 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
865 ctzl(sizeof(ETYPE)), GETPC(), false); \
866 }
867
868 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host)
869 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host)
870 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host)
871 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host)
872
873 /*
874 * Vector Integer Arithmetic Instructions
875 */
876
877 /* (TD, T1, T2, TX1, TX2) */
878 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
879 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
880 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
881 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
882 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
883 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
884 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
885 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
886 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
887 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
888 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
889 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
890 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
891 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
892 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
893 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
894 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
895 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
896 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
897 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
898 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
899 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
900 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
901
902 #define DO_SUB(N, M) (N - M)
903 #define DO_RSUB(N, M) (M - N)
904
905 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
906 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
907 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
908 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
909 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
910 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
911 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
912 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
913
914 GEN_VEXT_VV(vadd_vv_b, 1)
915 GEN_VEXT_VV(vadd_vv_h, 2)
916 GEN_VEXT_VV(vadd_vv_w, 4)
917 GEN_VEXT_VV(vadd_vv_d, 8)
918 GEN_VEXT_VV(vsub_vv_b, 1)
919 GEN_VEXT_VV(vsub_vv_h, 2)
920 GEN_VEXT_VV(vsub_vv_w, 4)
921 GEN_VEXT_VV(vsub_vv_d, 8)
922
923
924 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
925 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
926 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
927 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
928 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
929 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
930 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
931 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
932 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
933 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
934 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
935 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
936
937 GEN_VEXT_VX(vadd_vx_b, 1)
938 GEN_VEXT_VX(vadd_vx_h, 2)
939 GEN_VEXT_VX(vadd_vx_w, 4)
940 GEN_VEXT_VX(vadd_vx_d, 8)
941 GEN_VEXT_VX(vsub_vx_b, 1)
942 GEN_VEXT_VX(vsub_vx_h, 2)
943 GEN_VEXT_VX(vsub_vx_w, 4)
944 GEN_VEXT_VX(vsub_vx_d, 8)
945 GEN_VEXT_VX(vrsub_vx_b, 1)
946 GEN_VEXT_VX(vrsub_vx_h, 2)
947 GEN_VEXT_VX(vrsub_vx_w, 4)
948 GEN_VEXT_VX(vrsub_vx_d, 8)
949
950 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
951 {
952 intptr_t oprsz = simd_oprsz(desc);
953 intptr_t i;
954
955 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
956 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
957 }
958 }
959
HELPER(vec_rsubs16)960 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
961 {
962 intptr_t oprsz = simd_oprsz(desc);
963 intptr_t i;
964
965 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
966 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
967 }
968 }
969
HELPER(vec_rsubs32)970 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
971 {
972 intptr_t oprsz = simd_oprsz(desc);
973 intptr_t i;
974
975 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
976 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
977 }
978 }
979
HELPER(vec_rsubs64)980 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
981 {
982 intptr_t oprsz = simd_oprsz(desc);
983 intptr_t i;
984
985 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
986 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
987 }
988 }
989
990 /* Vector Widening Integer Add/Subtract */
991 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
992 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
993 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
994 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
995 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
996 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
997 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
998 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
999 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
1000 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
1001 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
1002 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
RVVCALL(OPIVV2,vwaddu_vv_b,WOP_UUU_B,H2,H1,H1,DO_ADD)1003 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
1004 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
1005 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
1006 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
1007 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
1008 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
1009 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
1010 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
1011 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
1012 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
1013 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
1014 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
1015 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
1016 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
1017 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
1018 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
1019 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
1020 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
1021 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
1022 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
1023 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1024 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1025 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1026 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
1027 GEN_VEXT_VV(vwaddu_vv_b, 2)
1028 GEN_VEXT_VV(vwaddu_vv_h, 4)
1029 GEN_VEXT_VV(vwaddu_vv_w, 8)
1030 GEN_VEXT_VV(vwsubu_vv_b, 2)
1031 GEN_VEXT_VV(vwsubu_vv_h, 4)
1032 GEN_VEXT_VV(vwsubu_vv_w, 8)
1033 GEN_VEXT_VV(vwadd_vv_b, 2)
1034 GEN_VEXT_VV(vwadd_vv_h, 4)
1035 GEN_VEXT_VV(vwadd_vv_w, 8)
1036 GEN_VEXT_VV(vwsub_vv_b, 2)
1037 GEN_VEXT_VV(vwsub_vv_h, 4)
1038 GEN_VEXT_VV(vwsub_vv_w, 8)
1039 GEN_VEXT_VV(vwaddu_wv_b, 2)
1040 GEN_VEXT_VV(vwaddu_wv_h, 4)
1041 GEN_VEXT_VV(vwaddu_wv_w, 8)
1042 GEN_VEXT_VV(vwsubu_wv_b, 2)
1043 GEN_VEXT_VV(vwsubu_wv_h, 4)
1044 GEN_VEXT_VV(vwsubu_wv_w, 8)
1045 GEN_VEXT_VV(vwadd_wv_b, 2)
1046 GEN_VEXT_VV(vwadd_wv_h, 4)
1047 GEN_VEXT_VV(vwadd_wv_w, 8)
1048 GEN_VEXT_VV(vwsub_wv_b, 2)
1049 GEN_VEXT_VV(vwsub_wv_h, 4)
1050 GEN_VEXT_VV(vwsub_wv_w, 8)
1051
1052 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1053 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1054 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1055 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1056 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1057 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1058 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1059 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1060 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1061 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1062 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1063 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1064 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1065 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1066 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1067 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1068 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1069 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1070 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1071 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1072 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1073 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1074 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1075 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1076 GEN_VEXT_VX(vwaddu_vx_b, 2)
1077 GEN_VEXT_VX(vwaddu_vx_h, 4)
1078 GEN_VEXT_VX(vwaddu_vx_w, 8)
1079 GEN_VEXT_VX(vwsubu_vx_b, 2)
1080 GEN_VEXT_VX(vwsubu_vx_h, 4)
1081 GEN_VEXT_VX(vwsubu_vx_w, 8)
1082 GEN_VEXT_VX(vwadd_vx_b, 2)
1083 GEN_VEXT_VX(vwadd_vx_h, 4)
1084 GEN_VEXT_VX(vwadd_vx_w, 8)
1085 GEN_VEXT_VX(vwsub_vx_b, 2)
1086 GEN_VEXT_VX(vwsub_vx_h, 4)
1087 GEN_VEXT_VX(vwsub_vx_w, 8)
1088 GEN_VEXT_VX(vwaddu_wx_b, 2)
1089 GEN_VEXT_VX(vwaddu_wx_h, 4)
1090 GEN_VEXT_VX(vwaddu_wx_w, 8)
1091 GEN_VEXT_VX(vwsubu_wx_b, 2)
1092 GEN_VEXT_VX(vwsubu_wx_h, 4)
1093 GEN_VEXT_VX(vwsubu_wx_w, 8)
1094 GEN_VEXT_VX(vwadd_wx_b, 2)
1095 GEN_VEXT_VX(vwadd_wx_h, 4)
1096 GEN_VEXT_VX(vwadd_wx_w, 8)
1097 GEN_VEXT_VX(vwsub_wx_b, 2)
1098 GEN_VEXT_VX(vwsub_wx_h, 4)
1099 GEN_VEXT_VX(vwsub_wx_w, 8)
1100
1101 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1102 #define DO_VADC(N, M, C) (N + M + C)
1103 #define DO_VSBC(N, M, C) (N - M - C)
1104
1105 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
1106 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1107 CPURISCVState *env, uint32_t desc) \
1108 { \
1109 uint32_t vl = env->vl; \
1110 uint32_t esz = sizeof(ETYPE); \
1111 uint32_t total_elems = \
1112 vext_get_total_elems(env, desc, esz); \
1113 uint32_t vta = vext_vta(desc); \
1114 uint32_t i; \
1115 \
1116 VSTART_CHECK_EARLY_EXIT(env, vl); \
1117 \
1118 for (i = env->vstart; i < vl; i++) { \
1119 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1120 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1121 ETYPE carry = vext_elem_mask(v0, i); \
1122 \
1123 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1124 } \
1125 env->vstart = 0; \
1126 /* set tail elements to 1s */ \
1127 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1128 }
1129
1130 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1131 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1132 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1133 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
1134
1135 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1136 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1137 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1138 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
1139
1140 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
1141 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1142 CPURISCVState *env, uint32_t desc) \
1143 { \
1144 uint32_t vl = env->vl; \
1145 uint32_t esz = sizeof(ETYPE); \
1146 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1147 uint32_t vta = vext_vta(desc); \
1148 uint32_t i; \
1149 \
1150 VSTART_CHECK_EARLY_EXIT(env, vl); \
1151 \
1152 for (i = env->vstart; i < vl; i++) { \
1153 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1154 ETYPE carry = vext_elem_mask(v0, i); \
1155 \
1156 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1157 } \
1158 env->vstart = 0; \
1159 /* set tail elements to 1s */ \
1160 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1161 }
1162
1163 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1164 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1165 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1166 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1167
1168 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1169 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1170 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1171 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1172
1173 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1174 (__typeof(N))(N + M) < N)
1175 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1176
1177 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1178 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1179 CPURISCVState *env, uint32_t desc) \
1180 { \
1181 uint32_t vl = env->vl; \
1182 uint32_t vm = vext_vm(desc); \
1183 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1184 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1185 uint32_t i; \
1186 \
1187 VSTART_CHECK_EARLY_EXIT(env, vl); \
1188 \
1189 for (i = env->vstart; i < vl; i++) { \
1190 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1191 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1192 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1193 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1194 } \
1195 env->vstart = 0; \
1196 /*
1197 * mask destination register are always tail-agnostic
1198 * set tail elements to 1s
1199 */ \
1200 if (vta_all_1s) { \
1201 for (; i < total_elems; i++) { \
1202 vext_set_elem_mask(vd, i, 1); \
1203 } \
1204 } \
1205 }
1206
1207 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1208 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1209 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1210 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1211
1212 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1213 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1214 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1215 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1216
1217 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1218 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1219 void *vs2, CPURISCVState *env, uint32_t desc) \
1220 { \
1221 uint32_t vl = env->vl; \
1222 uint32_t vm = vext_vm(desc); \
1223 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1224 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1225 uint32_t i; \
1226 \
1227 VSTART_CHECK_EARLY_EXIT(env, vl); \
1228 \
1229 for (i = env->vstart; i < vl; i++) { \
1230 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1231 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1232 vext_set_elem_mask(vd, i, \
1233 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1234 } \
1235 env->vstart = 0; \
1236 /*
1237 * mask destination register are always tail-agnostic
1238 * set tail elements to 1s
1239 */ \
1240 if (vta_all_1s) { \
1241 for (; i < total_elems; i++) { \
1242 vext_set_elem_mask(vd, i, 1); \
1243 } \
1244 } \
1245 }
1246
1247 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1248 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1249 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1250 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1251
1252 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1253 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1254 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1255 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1256
1257 /* Vector Bitwise Logical Instructions */
1258 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1259 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1260 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1261 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1262 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1263 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1264 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1265 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1266 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1267 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1268 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1269 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1270 GEN_VEXT_VV(vand_vv_b, 1)
1271 GEN_VEXT_VV(vand_vv_h, 2)
1272 GEN_VEXT_VV(vand_vv_w, 4)
1273 GEN_VEXT_VV(vand_vv_d, 8)
1274 GEN_VEXT_VV(vor_vv_b, 1)
1275 GEN_VEXT_VV(vor_vv_h, 2)
1276 GEN_VEXT_VV(vor_vv_w, 4)
1277 GEN_VEXT_VV(vor_vv_d, 8)
1278 GEN_VEXT_VV(vxor_vv_b, 1)
1279 GEN_VEXT_VV(vxor_vv_h, 2)
1280 GEN_VEXT_VV(vxor_vv_w, 4)
1281 GEN_VEXT_VV(vxor_vv_d, 8)
1282
1283 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1284 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1285 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1286 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1287 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1288 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1289 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1290 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1291 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1292 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1293 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1294 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1295 GEN_VEXT_VX(vand_vx_b, 1)
1296 GEN_VEXT_VX(vand_vx_h, 2)
1297 GEN_VEXT_VX(vand_vx_w, 4)
1298 GEN_VEXT_VX(vand_vx_d, 8)
1299 GEN_VEXT_VX(vor_vx_b, 1)
1300 GEN_VEXT_VX(vor_vx_h, 2)
1301 GEN_VEXT_VX(vor_vx_w, 4)
1302 GEN_VEXT_VX(vor_vx_d, 8)
1303 GEN_VEXT_VX(vxor_vx_b, 1)
1304 GEN_VEXT_VX(vxor_vx_h, 2)
1305 GEN_VEXT_VX(vxor_vx_w, 4)
1306 GEN_VEXT_VX(vxor_vx_d, 8)
1307
1308 /* Vector Single-Width Bit Shift Instructions */
1309 #define DO_SLL(N, M) (N << (M))
1310 #define DO_SRL(N, M) (N >> (M))
1311
1312 /* generate the helpers for shift instructions with two vector operators */
1313 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1314 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1315 void *vs2, CPURISCVState *env, uint32_t desc) \
1316 { \
1317 uint32_t vm = vext_vm(desc); \
1318 uint32_t vl = env->vl; \
1319 uint32_t esz = sizeof(TS1); \
1320 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1321 uint32_t vta = vext_vta(desc); \
1322 uint32_t vma = vext_vma(desc); \
1323 uint32_t i; \
1324 \
1325 VSTART_CHECK_EARLY_EXIT(env, vl); \
1326 \
1327 for (i = env->vstart; i < vl; i++) { \
1328 if (!vm && !vext_elem_mask(v0, i)) { \
1329 /* set masked-off elements to 1s */ \
1330 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
1331 continue; \
1332 } \
1333 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1334 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1335 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1336 } \
1337 env->vstart = 0; \
1338 /* set tail elements to 1s */ \
1339 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1340 }
1341
1342 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1343 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1344 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1345 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1346
1347 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1348 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1349 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1350 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1351
1352 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1353 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1354 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1355 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1356
1357 /*
1358 * generate the helpers for shift instructions with one vector and one scalar
1359 */
1360 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1361 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1362 void *vs2, CPURISCVState *env, \
1363 uint32_t desc) \
1364 { \
1365 uint32_t vm = vext_vm(desc); \
1366 uint32_t vl = env->vl; \
1367 uint32_t esz = sizeof(TD); \
1368 uint32_t total_elems = \
1369 vext_get_total_elems(env, desc, esz); \
1370 uint32_t vta = vext_vta(desc); \
1371 uint32_t vma = vext_vma(desc); \
1372 uint32_t i; \
1373 \
1374 VSTART_CHECK_EARLY_EXIT(env, vl); \
1375 \
1376 for (i = env->vstart; i < vl; i++) { \
1377 if (!vm && !vext_elem_mask(v0, i)) { \
1378 /* set masked-off elements to 1s */ \
1379 vext_set_elems_1s(vd, vma, i * esz, \
1380 (i + 1) * esz); \
1381 continue; \
1382 } \
1383 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1384 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1385 } \
1386 env->vstart = 0; \
1387 /* set tail elements to 1s */ \
1388 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
1389 }
1390
1391 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1392 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1393 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1394 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1395
1396 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1397 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1398 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1399 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1400
1401 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1402 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1403 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1404 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1405
1406 /* Vector Narrowing Integer Right Shift Instructions */
1407 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1408 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1409 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1410 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1411 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1412 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1413 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1414 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1415 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1416 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1417 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1418 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1419
1420 /* Vector Integer Comparison Instructions */
1421 #define DO_MSEQ(N, M) (N == M)
1422 #define DO_MSNE(N, M) (N != M)
1423 #define DO_MSLT(N, M) (N < M)
1424 #define DO_MSLE(N, M) (N <= M)
1425 #define DO_MSGT(N, M) (N > M)
1426
1427 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1428 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1429 CPURISCVState *env, uint32_t desc) \
1430 { \
1431 uint32_t vm = vext_vm(desc); \
1432 uint32_t vl = env->vl; \
1433 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1434 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1435 uint32_t vma = vext_vma(desc); \
1436 uint32_t i; \
1437 \
1438 VSTART_CHECK_EARLY_EXIT(env, vl); \
1439 \
1440 for (i = env->vstart; i < vl; i++) { \
1441 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1442 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1443 if (!vm && !vext_elem_mask(v0, i)) { \
1444 /* set masked-off elements to 1s */ \
1445 if (vma) { \
1446 vext_set_elem_mask(vd, i, 1); \
1447 } \
1448 continue; \
1449 } \
1450 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1451 } \
1452 env->vstart = 0; \
1453 /*
1454 * mask destination register are always tail-agnostic
1455 * set tail elements to 1s
1456 */ \
1457 if (vta_all_1s) { \
1458 for (; i < total_elems; i++) { \
1459 vext_set_elem_mask(vd, i, 1); \
1460 } \
1461 } \
1462 }
1463
1464 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1465 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1466 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1467 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1468
1469 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1470 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1471 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1472 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1473
1474 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1475 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1476 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1477 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1478
1479 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1480 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1481 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1482 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1483
1484 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1485 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1486 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1487 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1488
1489 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1490 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1491 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1492 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1493
1494 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1495 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1496 CPURISCVState *env, uint32_t desc) \
1497 { \
1498 uint32_t vm = vext_vm(desc); \
1499 uint32_t vl = env->vl; \
1500 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1501 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1502 uint32_t vma = vext_vma(desc); \
1503 uint32_t i; \
1504 \
1505 VSTART_CHECK_EARLY_EXIT(env, vl); \
1506 \
1507 for (i = env->vstart; i < vl; i++) { \
1508 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1509 if (!vm && !vext_elem_mask(v0, i)) { \
1510 /* set masked-off elements to 1s */ \
1511 if (vma) { \
1512 vext_set_elem_mask(vd, i, 1); \
1513 } \
1514 continue; \
1515 } \
1516 vext_set_elem_mask(vd, i, \
1517 DO_OP(s2, (ETYPE)(target_long)s1)); \
1518 } \
1519 env->vstart = 0; \
1520 /*
1521 * mask destination register are always tail-agnostic
1522 * set tail elements to 1s
1523 */ \
1524 if (vta_all_1s) { \
1525 for (; i < total_elems; i++) { \
1526 vext_set_elem_mask(vd, i, 1); \
1527 } \
1528 } \
1529 }
1530
1531 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1532 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1533 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1534 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1535
1536 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1537 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1538 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1539 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1540
1541 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1542 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1543 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1544 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1545
1546 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1547 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1548 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1549 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1550
1551 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1552 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1553 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1554 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1555
1556 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1557 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1558 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1559 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1560
1561 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1562 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1563 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1564 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1565
1566 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1567 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1568 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1569 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1570
1571 /* Vector Integer Min/Max Instructions */
1572 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1573 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1574 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1575 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1576 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1577 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1578 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1579 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1580 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1581 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1582 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1583 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1584 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1585 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1586 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1587 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1588 GEN_VEXT_VV(vminu_vv_b, 1)
1589 GEN_VEXT_VV(vminu_vv_h, 2)
1590 GEN_VEXT_VV(vminu_vv_w, 4)
1591 GEN_VEXT_VV(vminu_vv_d, 8)
1592 GEN_VEXT_VV(vmin_vv_b, 1)
1593 GEN_VEXT_VV(vmin_vv_h, 2)
1594 GEN_VEXT_VV(vmin_vv_w, 4)
1595 GEN_VEXT_VV(vmin_vv_d, 8)
1596 GEN_VEXT_VV(vmaxu_vv_b, 1)
1597 GEN_VEXT_VV(vmaxu_vv_h, 2)
1598 GEN_VEXT_VV(vmaxu_vv_w, 4)
1599 GEN_VEXT_VV(vmaxu_vv_d, 8)
1600 GEN_VEXT_VV(vmax_vv_b, 1)
1601 GEN_VEXT_VV(vmax_vv_h, 2)
1602 GEN_VEXT_VV(vmax_vv_w, 4)
1603 GEN_VEXT_VV(vmax_vv_d, 8)
1604
1605 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1606 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1607 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1608 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1609 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1610 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1611 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1612 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1613 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1614 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1615 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1616 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1617 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1618 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1619 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1620 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1621 GEN_VEXT_VX(vminu_vx_b, 1)
1622 GEN_VEXT_VX(vminu_vx_h, 2)
1623 GEN_VEXT_VX(vminu_vx_w, 4)
1624 GEN_VEXT_VX(vminu_vx_d, 8)
1625 GEN_VEXT_VX(vmin_vx_b, 1)
1626 GEN_VEXT_VX(vmin_vx_h, 2)
1627 GEN_VEXT_VX(vmin_vx_w, 4)
1628 GEN_VEXT_VX(vmin_vx_d, 8)
1629 GEN_VEXT_VX(vmaxu_vx_b, 1)
1630 GEN_VEXT_VX(vmaxu_vx_h, 2)
1631 GEN_VEXT_VX(vmaxu_vx_w, 4)
1632 GEN_VEXT_VX(vmaxu_vx_d, 8)
1633 GEN_VEXT_VX(vmax_vx_b, 1)
1634 GEN_VEXT_VX(vmax_vx_h, 2)
1635 GEN_VEXT_VX(vmax_vx_w, 4)
1636 GEN_VEXT_VX(vmax_vx_d, 8)
1637
1638 /* Vector Single-Width Integer Multiply Instructions */
1639 #define DO_MUL(N, M) (N * M)
1640 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1641 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1642 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1643 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1644 GEN_VEXT_VV(vmul_vv_b, 1)
1645 GEN_VEXT_VV(vmul_vv_h, 2)
1646 GEN_VEXT_VV(vmul_vv_w, 4)
1647 GEN_VEXT_VV(vmul_vv_d, 8)
1648
1649 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1650 {
1651 return (int16_t)s2 * (int16_t)s1 >> 8;
1652 }
1653
do_mulh_h(int16_t s2,int16_t s1)1654 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1655 {
1656 return (int32_t)s2 * (int32_t)s1 >> 16;
1657 }
1658
do_mulh_w(int32_t s2,int32_t s1)1659 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1660 {
1661 return (int64_t)s2 * (int64_t)s1 >> 32;
1662 }
1663
do_mulh_d(int64_t s2,int64_t s1)1664 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1665 {
1666 uint64_t hi_64, lo_64;
1667
1668 muls64(&lo_64, &hi_64, s1, s2);
1669 return hi_64;
1670 }
1671
do_mulhu_b(uint8_t s2,uint8_t s1)1672 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1673 {
1674 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1675 }
1676
do_mulhu_h(uint16_t s2,uint16_t s1)1677 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1678 {
1679 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1680 }
1681
do_mulhu_w(uint32_t s2,uint32_t s1)1682 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1683 {
1684 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1685 }
1686
do_mulhu_d(uint64_t s2,uint64_t s1)1687 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1688 {
1689 uint64_t hi_64, lo_64;
1690
1691 mulu64(&lo_64, &hi_64, s2, s1);
1692 return hi_64;
1693 }
1694
do_mulhsu_b(int8_t s2,uint8_t s1)1695 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1696 {
1697 return (int16_t)s2 * (uint16_t)s1 >> 8;
1698 }
1699
do_mulhsu_h(int16_t s2,uint16_t s1)1700 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1701 {
1702 return (int32_t)s2 * (uint32_t)s1 >> 16;
1703 }
1704
do_mulhsu_w(int32_t s2,uint32_t s1)1705 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1706 {
1707 return (int64_t)s2 * (uint64_t)s1 >> 32;
1708 }
1709
1710 /*
1711 * Let A = signed operand,
1712 * B = unsigned operand
1713 * P = mulu64(A, B), unsigned product
1714 *
1715 * LET X = 2 ** 64 - A, 2's complement of A
1716 * SP = signed product
1717 * THEN
1718 * IF A < 0
1719 * SP = -X * B
1720 * = -(2 ** 64 - A) * B
1721 * = A * B - 2 ** 64 * B
1722 * = P - 2 ** 64 * B
1723 * ELSE
1724 * SP = P
1725 * THEN
1726 * HI_P -= (A < 0 ? B : 0)
1727 */
1728
do_mulhsu_d(int64_t s2,uint64_t s1)1729 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1730 {
1731 uint64_t hi_64, lo_64;
1732
1733 mulu64(&lo_64, &hi_64, s2, s1);
1734
1735 hi_64 -= s2 < 0 ? s1 : 0;
1736 return hi_64;
1737 }
1738
1739 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1740 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1741 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1742 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1743 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1744 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1745 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1746 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1747 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1748 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1749 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1750 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1751 GEN_VEXT_VV(vmulh_vv_b, 1)
1752 GEN_VEXT_VV(vmulh_vv_h, 2)
1753 GEN_VEXT_VV(vmulh_vv_w, 4)
1754 GEN_VEXT_VV(vmulh_vv_d, 8)
1755 GEN_VEXT_VV(vmulhu_vv_b, 1)
1756 GEN_VEXT_VV(vmulhu_vv_h, 2)
1757 GEN_VEXT_VV(vmulhu_vv_w, 4)
1758 GEN_VEXT_VV(vmulhu_vv_d, 8)
1759 GEN_VEXT_VV(vmulhsu_vv_b, 1)
1760 GEN_VEXT_VV(vmulhsu_vv_h, 2)
1761 GEN_VEXT_VV(vmulhsu_vv_w, 4)
1762 GEN_VEXT_VV(vmulhsu_vv_d, 8)
1763
1764 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1765 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1766 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1767 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1768 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1769 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1770 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1771 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1772 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1773 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1774 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1775 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1776 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1777 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1778 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1779 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1780 GEN_VEXT_VX(vmul_vx_b, 1)
1781 GEN_VEXT_VX(vmul_vx_h, 2)
1782 GEN_VEXT_VX(vmul_vx_w, 4)
1783 GEN_VEXT_VX(vmul_vx_d, 8)
1784 GEN_VEXT_VX(vmulh_vx_b, 1)
1785 GEN_VEXT_VX(vmulh_vx_h, 2)
1786 GEN_VEXT_VX(vmulh_vx_w, 4)
1787 GEN_VEXT_VX(vmulh_vx_d, 8)
1788 GEN_VEXT_VX(vmulhu_vx_b, 1)
1789 GEN_VEXT_VX(vmulhu_vx_h, 2)
1790 GEN_VEXT_VX(vmulhu_vx_w, 4)
1791 GEN_VEXT_VX(vmulhu_vx_d, 8)
1792 GEN_VEXT_VX(vmulhsu_vx_b, 1)
1793 GEN_VEXT_VX(vmulhsu_vx_h, 2)
1794 GEN_VEXT_VX(vmulhsu_vx_w, 4)
1795 GEN_VEXT_VX(vmulhsu_vx_d, 8)
1796
1797 /* Vector Integer Divide Instructions */
1798 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1799 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1800 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
1801 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1802 #define DO_REM(N, M) (unlikely(M == 0) ? N : \
1803 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1804
1805 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1806 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1807 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1808 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1809 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1810 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1811 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1812 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1813 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1814 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1815 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1816 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1817 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1818 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1819 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1820 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1821 GEN_VEXT_VV(vdivu_vv_b, 1)
1822 GEN_VEXT_VV(vdivu_vv_h, 2)
1823 GEN_VEXT_VV(vdivu_vv_w, 4)
1824 GEN_VEXT_VV(vdivu_vv_d, 8)
1825 GEN_VEXT_VV(vdiv_vv_b, 1)
1826 GEN_VEXT_VV(vdiv_vv_h, 2)
1827 GEN_VEXT_VV(vdiv_vv_w, 4)
1828 GEN_VEXT_VV(vdiv_vv_d, 8)
1829 GEN_VEXT_VV(vremu_vv_b, 1)
1830 GEN_VEXT_VV(vremu_vv_h, 2)
1831 GEN_VEXT_VV(vremu_vv_w, 4)
1832 GEN_VEXT_VV(vremu_vv_d, 8)
1833 GEN_VEXT_VV(vrem_vv_b, 1)
1834 GEN_VEXT_VV(vrem_vv_h, 2)
1835 GEN_VEXT_VV(vrem_vv_w, 4)
1836 GEN_VEXT_VV(vrem_vv_d, 8)
1837
1838 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1839 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1840 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1841 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1842 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1843 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1844 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1845 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1846 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1847 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1848 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1849 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1850 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1851 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1852 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1853 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1854 GEN_VEXT_VX(vdivu_vx_b, 1)
1855 GEN_VEXT_VX(vdivu_vx_h, 2)
1856 GEN_VEXT_VX(vdivu_vx_w, 4)
1857 GEN_VEXT_VX(vdivu_vx_d, 8)
1858 GEN_VEXT_VX(vdiv_vx_b, 1)
1859 GEN_VEXT_VX(vdiv_vx_h, 2)
1860 GEN_VEXT_VX(vdiv_vx_w, 4)
1861 GEN_VEXT_VX(vdiv_vx_d, 8)
1862 GEN_VEXT_VX(vremu_vx_b, 1)
1863 GEN_VEXT_VX(vremu_vx_h, 2)
1864 GEN_VEXT_VX(vremu_vx_w, 4)
1865 GEN_VEXT_VX(vremu_vx_d, 8)
1866 GEN_VEXT_VX(vrem_vx_b, 1)
1867 GEN_VEXT_VX(vrem_vx_h, 2)
1868 GEN_VEXT_VX(vrem_vx_w, 4)
1869 GEN_VEXT_VX(vrem_vx_d, 8)
1870
1871 /* Vector Widening Integer Multiply Instructions */
1872 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1873 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1874 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1875 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1876 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1877 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1878 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1879 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1880 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1881 GEN_VEXT_VV(vwmul_vv_b, 2)
1882 GEN_VEXT_VV(vwmul_vv_h, 4)
1883 GEN_VEXT_VV(vwmul_vv_w, 8)
1884 GEN_VEXT_VV(vwmulu_vv_b, 2)
1885 GEN_VEXT_VV(vwmulu_vv_h, 4)
1886 GEN_VEXT_VV(vwmulu_vv_w, 8)
1887 GEN_VEXT_VV(vwmulsu_vv_b, 2)
1888 GEN_VEXT_VV(vwmulsu_vv_h, 4)
1889 GEN_VEXT_VV(vwmulsu_vv_w, 8)
1890
1891 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1892 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1893 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1894 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1895 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1896 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1897 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1898 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1899 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1900 GEN_VEXT_VX(vwmul_vx_b, 2)
1901 GEN_VEXT_VX(vwmul_vx_h, 4)
1902 GEN_VEXT_VX(vwmul_vx_w, 8)
1903 GEN_VEXT_VX(vwmulu_vx_b, 2)
1904 GEN_VEXT_VX(vwmulu_vx_h, 4)
1905 GEN_VEXT_VX(vwmulu_vx_w, 8)
1906 GEN_VEXT_VX(vwmulsu_vx_b, 2)
1907 GEN_VEXT_VX(vwmulsu_vx_h, 4)
1908 GEN_VEXT_VX(vwmulsu_vx_w, 8)
1909
1910 /* Vector Single-Width Integer Multiply-Add Instructions */
1911 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1912 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1913 { \
1914 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1915 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1916 TD d = *((TD *)vd + HD(i)); \
1917 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1918 }
1919
1920 #define DO_MACC(N, M, D) (M * N + D)
1921 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1922 #define DO_MADD(N, M, D) (M * D + N)
1923 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1924 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1925 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1926 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1927 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1928 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1929 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1930 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1931 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1932 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1933 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1934 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1935 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1936 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1937 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1938 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1939 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1940 GEN_VEXT_VV(vmacc_vv_b, 1)
1941 GEN_VEXT_VV(vmacc_vv_h, 2)
1942 GEN_VEXT_VV(vmacc_vv_w, 4)
1943 GEN_VEXT_VV(vmacc_vv_d, 8)
1944 GEN_VEXT_VV(vnmsac_vv_b, 1)
1945 GEN_VEXT_VV(vnmsac_vv_h, 2)
1946 GEN_VEXT_VV(vnmsac_vv_w, 4)
1947 GEN_VEXT_VV(vnmsac_vv_d, 8)
1948 GEN_VEXT_VV(vmadd_vv_b, 1)
1949 GEN_VEXT_VV(vmadd_vv_h, 2)
1950 GEN_VEXT_VV(vmadd_vv_w, 4)
1951 GEN_VEXT_VV(vmadd_vv_d, 8)
1952 GEN_VEXT_VV(vnmsub_vv_b, 1)
1953 GEN_VEXT_VV(vnmsub_vv_h, 2)
1954 GEN_VEXT_VV(vnmsub_vv_w, 4)
1955 GEN_VEXT_VV(vnmsub_vv_d, 8)
1956
1957 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1958 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1959 { \
1960 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1961 TD d = *((TD *)vd + HD(i)); \
1962 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1963 }
1964
1965 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1966 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1967 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1968 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1969 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1970 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1971 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1972 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1973 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1974 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1975 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1976 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1977 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1978 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1979 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1980 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1981 GEN_VEXT_VX(vmacc_vx_b, 1)
1982 GEN_VEXT_VX(vmacc_vx_h, 2)
1983 GEN_VEXT_VX(vmacc_vx_w, 4)
1984 GEN_VEXT_VX(vmacc_vx_d, 8)
1985 GEN_VEXT_VX(vnmsac_vx_b, 1)
1986 GEN_VEXT_VX(vnmsac_vx_h, 2)
1987 GEN_VEXT_VX(vnmsac_vx_w, 4)
1988 GEN_VEXT_VX(vnmsac_vx_d, 8)
1989 GEN_VEXT_VX(vmadd_vx_b, 1)
1990 GEN_VEXT_VX(vmadd_vx_h, 2)
1991 GEN_VEXT_VX(vmadd_vx_w, 4)
1992 GEN_VEXT_VX(vmadd_vx_d, 8)
1993 GEN_VEXT_VX(vnmsub_vx_b, 1)
1994 GEN_VEXT_VX(vnmsub_vx_h, 2)
1995 GEN_VEXT_VX(vnmsub_vx_w, 4)
1996 GEN_VEXT_VX(vnmsub_vx_d, 8)
1997
1998 /* Vector Widening Integer Multiply-Add Instructions */
1999 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
2000 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
2001 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
2002 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
2003 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
2004 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
2005 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
2006 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
2007 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
2008 GEN_VEXT_VV(vwmaccu_vv_b, 2)
2009 GEN_VEXT_VV(vwmaccu_vv_h, 4)
2010 GEN_VEXT_VV(vwmaccu_vv_w, 8)
2011 GEN_VEXT_VV(vwmacc_vv_b, 2)
2012 GEN_VEXT_VV(vwmacc_vv_h, 4)
2013 GEN_VEXT_VV(vwmacc_vv_w, 8)
2014 GEN_VEXT_VV(vwmaccsu_vv_b, 2)
2015 GEN_VEXT_VV(vwmaccsu_vv_h, 4)
2016 GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2017
2018 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
2019 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
2020 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
2021 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
2022 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
2023 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
2024 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
2025 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
2026 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
2027 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
2028 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
2029 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
2030 GEN_VEXT_VX(vwmaccu_vx_b, 2)
2031 GEN_VEXT_VX(vwmaccu_vx_h, 4)
2032 GEN_VEXT_VX(vwmaccu_vx_w, 8)
2033 GEN_VEXT_VX(vwmacc_vx_b, 2)
2034 GEN_VEXT_VX(vwmacc_vx_h, 4)
2035 GEN_VEXT_VX(vwmacc_vx_w, 8)
2036 GEN_VEXT_VX(vwmaccsu_vx_b, 2)
2037 GEN_VEXT_VX(vwmaccsu_vx_h, 4)
2038 GEN_VEXT_VX(vwmaccsu_vx_w, 8)
2039 GEN_VEXT_VX(vwmaccus_vx_b, 2)
2040 GEN_VEXT_VX(vwmaccus_vx_h, 4)
2041 GEN_VEXT_VX(vwmaccus_vx_w, 8)
2042
2043 /* Vector Integer Merge and Move Instructions */
2044 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
2045 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
2046 uint32_t desc) \
2047 { \
2048 uint32_t vl = env->vl; \
2049 uint32_t esz = sizeof(ETYPE); \
2050 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2051 uint32_t vta = vext_vta(desc); \
2052 uint32_t i; \
2053 \
2054 VSTART_CHECK_EARLY_EXIT(env, vl); \
2055 \
2056 for (i = env->vstart; i < vl; i++) { \
2057 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2058 *((ETYPE *)vd + H(i)) = s1; \
2059 } \
2060 env->vstart = 0; \
2061 /* set tail elements to 1s */ \
2062 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2063 }
2064
2065 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2066 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2067 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2068 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
2069
2070 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
2071 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2072 uint32_t desc) \
2073 { \
2074 uint32_t vl = env->vl; \
2075 uint32_t esz = sizeof(ETYPE); \
2076 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2077 uint32_t vta = vext_vta(desc); \
2078 uint32_t i; \
2079 \
2080 VSTART_CHECK_EARLY_EXIT(env, vl); \
2081 \
2082 for (i = env->vstart; i < vl; i++) { \
2083 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2084 } \
2085 env->vstart = 0; \
2086 /* set tail elements to 1s */ \
2087 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2088 }
2089
2090 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2091 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2092 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2093 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
2094
2095 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
2096 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2097 CPURISCVState *env, uint32_t desc) \
2098 { \
2099 uint32_t vl = env->vl; \
2100 uint32_t esz = sizeof(ETYPE); \
2101 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2102 uint32_t vta = vext_vta(desc); \
2103 uint32_t i; \
2104 \
2105 VSTART_CHECK_EARLY_EXIT(env, vl); \
2106 \
2107 for (i = env->vstart; i < vl; i++) { \
2108 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
2109 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2110 } \
2111 env->vstart = 0; \
2112 /* set tail elements to 1s */ \
2113 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2114 }
2115
2116 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2117 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2118 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2119 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
2120
2121 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
2122 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2123 void *vs2, CPURISCVState *env, uint32_t desc) \
2124 { \
2125 uint32_t vl = env->vl; \
2126 uint32_t esz = sizeof(ETYPE); \
2127 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2128 uint32_t vta = vext_vta(desc); \
2129 uint32_t i; \
2130 \
2131 VSTART_CHECK_EARLY_EXIT(env, vl); \
2132 \
2133 for (i = env->vstart; i < vl; i++) { \
2134 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
2135 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
2136 (ETYPE)(target_long)s1); \
2137 *((ETYPE *)vd + H(i)) = d; \
2138 } \
2139 env->vstart = 0; \
2140 /* set tail elements to 1s */ \
2141 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2142 }
2143
2144 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2145 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2146 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2147 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
2148
2149 /*
2150 * Vector Fixed-Point Arithmetic Instructions
2151 */
2152
2153 /* Vector Single-Width Saturating Add and Subtract */
2154
2155 /*
2156 * As fixed point instructions probably have round mode and saturation,
2157 * define common macros for fixed point here.
2158 */
2159 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2160 CPURISCVState *env, int vxrm);
2161
2162 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2163 static inline void \
2164 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2165 CPURISCVState *env, int vxrm) \
2166 { \
2167 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2168 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2169 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2170 }
2171
2172 static inline void
vext_vv_rm_1(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivv2_rm_fn * fn,uint32_t vma,uint32_t esz)2173 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2174 CPURISCVState *env,
2175 uint32_t vl, uint32_t vm, int vxrm,
2176 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
2177 {
2178 for (uint32_t i = env->vstart; i < vl; i++) {
2179 if (!vm && !vext_elem_mask(v0, i)) {
2180 /* set masked-off elements to 1s */
2181 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2182 continue;
2183 }
2184 fn(vd, vs1, vs2, i, env, vxrm);
2185 }
2186 env->vstart = 0;
2187 }
2188
2189 static inline void
vext_vv_rm_2(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t desc,opivv2_rm_fn * fn,uint32_t esz)2190 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2191 CPURISCVState *env,
2192 uint32_t desc,
2193 opivv2_rm_fn *fn, uint32_t esz)
2194 {
2195 uint32_t vm = vext_vm(desc);
2196 uint32_t vl = env->vl;
2197 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2198 uint32_t vta = vext_vta(desc);
2199 uint32_t vma = vext_vma(desc);
2200
2201 VSTART_CHECK_EARLY_EXIT(env, vl);
2202
2203 switch (env->vxrm) {
2204 case 0: /* rnu */
2205 vext_vv_rm_1(vd, v0, vs1, vs2,
2206 env, vl, vm, 0, fn, vma, esz);
2207 break;
2208 case 1: /* rne */
2209 vext_vv_rm_1(vd, v0, vs1, vs2,
2210 env, vl, vm, 1, fn, vma, esz);
2211 break;
2212 case 2: /* rdn */
2213 vext_vv_rm_1(vd, v0, vs1, vs2,
2214 env, vl, vm, 2, fn, vma, esz);
2215 break;
2216 default: /* rod */
2217 vext_vv_rm_1(vd, v0, vs1, vs2,
2218 env, vl, vm, 3, fn, vma, esz);
2219 break;
2220 }
2221 /* set tail elements to 1s */
2222 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2223 }
2224
2225 /* generate helpers for fixed point instructions with OPIVV format */
2226 #define GEN_VEXT_VV_RM(NAME, ESZ) \
2227 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2228 CPURISCVState *env, uint32_t desc) \
2229 { \
2230 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2231 do_##NAME, ESZ); \
2232 }
2233
saddu8(CPURISCVState * env,int vxrm,uint8_t a,uint8_t b)2234 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2235 uint8_t b)
2236 {
2237 uint8_t res = a + b;
2238 if (res < a) {
2239 res = UINT8_MAX;
2240 env->vxsat = 0x1;
2241 }
2242 return res;
2243 }
2244
saddu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2245 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2246 uint16_t b)
2247 {
2248 uint16_t res = a + b;
2249 if (res < a) {
2250 res = UINT16_MAX;
2251 env->vxsat = 0x1;
2252 }
2253 return res;
2254 }
2255
saddu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2256 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2257 uint32_t b)
2258 {
2259 uint32_t res = a + b;
2260 if (res < a) {
2261 res = UINT32_MAX;
2262 env->vxsat = 0x1;
2263 }
2264 return res;
2265 }
2266
saddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2267 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2268 uint64_t b)
2269 {
2270 uint64_t res = a + b;
2271 if (res < a) {
2272 res = UINT64_MAX;
2273 env->vxsat = 0x1;
2274 }
2275 return res;
2276 }
2277
2278 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2279 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2280 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2281 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2282 GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2283 GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2284 GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2285 GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
2286
2287 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2288 CPURISCVState *env, int vxrm);
2289
2290 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2291 static inline void \
2292 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2293 CPURISCVState *env, int vxrm) \
2294 { \
2295 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2296 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2297 }
2298
2299 static inline void
vext_vx_rm_1(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivx2_rm_fn * fn,uint32_t vma,uint32_t esz)2300 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2301 CPURISCVState *env,
2302 uint32_t vl, uint32_t vm, int vxrm,
2303 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
2304 {
2305 for (uint32_t i = env->vstart; i < vl; i++) {
2306 if (!vm && !vext_elem_mask(v0, i)) {
2307 /* set masked-off elements to 1s */
2308 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2309 continue;
2310 }
2311 fn(vd, s1, vs2, i, env, vxrm);
2312 }
2313 env->vstart = 0;
2314 }
2315
2316 static inline void
vext_vx_rm_2(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t desc,opivx2_rm_fn * fn,uint32_t esz)2317 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2318 CPURISCVState *env,
2319 uint32_t desc,
2320 opivx2_rm_fn *fn, uint32_t esz)
2321 {
2322 uint32_t vm = vext_vm(desc);
2323 uint32_t vl = env->vl;
2324 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2325 uint32_t vta = vext_vta(desc);
2326 uint32_t vma = vext_vma(desc);
2327
2328 VSTART_CHECK_EARLY_EXIT(env, vl);
2329
2330 switch (env->vxrm) {
2331 case 0: /* rnu */
2332 vext_vx_rm_1(vd, v0, s1, vs2,
2333 env, vl, vm, 0, fn, vma, esz);
2334 break;
2335 case 1: /* rne */
2336 vext_vx_rm_1(vd, v0, s1, vs2,
2337 env, vl, vm, 1, fn, vma, esz);
2338 break;
2339 case 2: /* rdn */
2340 vext_vx_rm_1(vd, v0, s1, vs2,
2341 env, vl, vm, 2, fn, vma, esz);
2342 break;
2343 default: /* rod */
2344 vext_vx_rm_1(vd, v0, s1, vs2,
2345 env, vl, vm, 3, fn, vma, esz);
2346 break;
2347 }
2348 /* set tail elements to 1s */
2349 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2350 }
2351
2352 /* generate helpers for fixed point instructions with OPIVX format */
2353 #define GEN_VEXT_VX_RM(NAME, ESZ) \
2354 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2355 void *vs2, CPURISCVState *env, \
2356 uint32_t desc) \
2357 { \
2358 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2359 do_##NAME, ESZ); \
2360 }
2361
RVVCALL(OPIVX2_RM,vsaddu_vx_b,OP_UUU_B,H1,H1,saddu8)2362 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2363 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2364 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2365 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2366 GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2367 GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2368 GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2369 GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
2370
2371 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2372 {
2373 int8_t res = a + b;
2374 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2375 res = a > 0 ? INT8_MAX : INT8_MIN;
2376 env->vxsat = 0x1;
2377 }
2378 return res;
2379 }
2380
sadd16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2381 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2382 int16_t b)
2383 {
2384 int16_t res = a + b;
2385 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2386 res = a > 0 ? INT16_MAX : INT16_MIN;
2387 env->vxsat = 0x1;
2388 }
2389 return res;
2390 }
2391
sadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2392 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2393 int32_t b)
2394 {
2395 int32_t res = a + b;
2396 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2397 res = a > 0 ? INT32_MAX : INT32_MIN;
2398 env->vxsat = 0x1;
2399 }
2400 return res;
2401 }
2402
sadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2403 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2404 int64_t b)
2405 {
2406 int64_t res = a + b;
2407 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2408 res = a > 0 ? INT64_MAX : INT64_MIN;
2409 env->vxsat = 0x1;
2410 }
2411 return res;
2412 }
2413
RVVCALL(OPIVV2_RM,vsadd_vv_b,OP_SSS_B,H1,H1,H1,sadd8)2414 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2415 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2416 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2417 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2418 GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2419 GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2420 GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2421 GEN_VEXT_VV_RM(vsadd_vv_d, 8)
2422
2423 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2424 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2425 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2426 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2427 GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2428 GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2429 GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2430 GEN_VEXT_VX_RM(vsadd_vx_d, 8)
2431
2432 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2433 uint8_t b)
2434 {
2435 uint8_t res = a - b;
2436 if (res > a) {
2437 res = 0;
2438 env->vxsat = 0x1;
2439 }
2440 return res;
2441 }
2442
ssubu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2443 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2444 uint16_t b)
2445 {
2446 uint16_t res = a - b;
2447 if (res > a) {
2448 res = 0;
2449 env->vxsat = 0x1;
2450 }
2451 return res;
2452 }
2453
ssubu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2454 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2455 uint32_t b)
2456 {
2457 uint32_t res = a - b;
2458 if (res > a) {
2459 res = 0;
2460 env->vxsat = 0x1;
2461 }
2462 return res;
2463 }
2464
ssubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2465 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2466 uint64_t b)
2467 {
2468 uint64_t res = a - b;
2469 if (res > a) {
2470 res = 0;
2471 env->vxsat = 0x1;
2472 }
2473 return res;
2474 }
2475
RVVCALL(OPIVV2_RM,vssubu_vv_b,OP_UUU_B,H1,H1,H1,ssubu8)2476 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2477 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2478 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2479 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2480 GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2481 GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2482 GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2483 GEN_VEXT_VV_RM(vssubu_vv_d, 8)
2484
2485 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2486 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2487 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2488 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2489 GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2490 GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2491 GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2492 GEN_VEXT_VX_RM(vssubu_vx_d, 8)
2493
2494 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2495 {
2496 int8_t res = a - b;
2497 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2498 res = a >= 0 ? INT8_MAX : INT8_MIN;
2499 env->vxsat = 0x1;
2500 }
2501 return res;
2502 }
2503
ssub16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2504 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2505 int16_t b)
2506 {
2507 int16_t res = a - b;
2508 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2509 res = a >= 0 ? INT16_MAX : INT16_MIN;
2510 env->vxsat = 0x1;
2511 }
2512 return res;
2513 }
2514
ssub32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2515 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2516 int32_t b)
2517 {
2518 int32_t res = a - b;
2519 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2520 res = a >= 0 ? INT32_MAX : INT32_MIN;
2521 env->vxsat = 0x1;
2522 }
2523 return res;
2524 }
2525
ssub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2526 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2527 int64_t b)
2528 {
2529 int64_t res = a - b;
2530 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2531 res = a >= 0 ? INT64_MAX : INT64_MIN;
2532 env->vxsat = 0x1;
2533 }
2534 return res;
2535 }
2536
RVVCALL(OPIVV2_RM,vssub_vv_b,OP_SSS_B,H1,H1,H1,ssub8)2537 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2538 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2539 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2540 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2541 GEN_VEXT_VV_RM(vssub_vv_b, 1)
2542 GEN_VEXT_VV_RM(vssub_vv_h, 2)
2543 GEN_VEXT_VV_RM(vssub_vv_w, 4)
2544 GEN_VEXT_VV_RM(vssub_vv_d, 8)
2545
2546 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2547 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2548 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2549 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2550 GEN_VEXT_VX_RM(vssub_vx_b, 1)
2551 GEN_VEXT_VX_RM(vssub_vx_h, 2)
2552 GEN_VEXT_VX_RM(vssub_vx_w, 4)
2553 GEN_VEXT_VX_RM(vssub_vx_d, 8)
2554
2555 /* Vector Single-Width Averaging Add and Subtract */
2556 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2557 {
2558 uint8_t d = extract64(v, shift, 1);
2559 uint8_t d1;
2560 uint64_t D1, D2;
2561
2562 if (shift == 0 || shift > 64) {
2563 return 0;
2564 }
2565
2566 d1 = extract64(v, shift - 1, 1);
2567 D1 = extract64(v, 0, shift);
2568 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2569 return d1;
2570 } else if (vxrm == 1) { /* round-to-nearest-even */
2571 if (shift > 1) {
2572 D2 = extract64(v, 0, shift - 1);
2573 return d1 & ((D2 != 0) | d);
2574 } else {
2575 return d1 & d;
2576 }
2577 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2578 return !d & (D1 != 0);
2579 }
2580 return 0; /* round-down (truncate) */
2581 }
2582
aadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2583 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2584 int32_t b)
2585 {
2586 int64_t res = (int64_t)a + b;
2587 uint8_t round = get_round(vxrm, res, 1);
2588
2589 return (res >> 1) + round;
2590 }
2591
aadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2592 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2593 int64_t b)
2594 {
2595 int64_t res = a + b;
2596 uint8_t round = get_round(vxrm, res, 1);
2597 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2598
2599 /* With signed overflow, bit 64 is inverse of bit 63. */
2600 return ((res >> 1) ^ over) + round;
2601 }
2602
RVVCALL(OPIVV2_RM,vaadd_vv_b,OP_SSS_B,H1,H1,H1,aadd32)2603 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2604 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2605 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2606 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2607 GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2608 GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2609 GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2610 GEN_VEXT_VV_RM(vaadd_vv_d, 8)
2611
2612 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2613 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2614 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2615 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2616 GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2617 GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2618 GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2619 GEN_VEXT_VX_RM(vaadd_vx_d, 8)
2620
2621 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2622 uint32_t a, uint32_t b)
2623 {
2624 uint64_t res = (uint64_t)a + b;
2625 uint8_t round = get_round(vxrm, res, 1);
2626
2627 return (res >> 1) + round;
2628 }
2629
aaddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2630 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2631 uint64_t a, uint64_t b)
2632 {
2633 uint64_t res = a + b;
2634 uint8_t round = get_round(vxrm, res, 1);
2635 uint64_t over = (uint64_t)(res < a) << 63;
2636
2637 return ((res >> 1) | over) + round;
2638 }
2639
RVVCALL(OPIVV2_RM,vaaddu_vv_b,OP_UUU_B,H1,H1,H1,aaddu32)2640 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2641 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2642 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2643 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2644 GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2645 GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2646 GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2647 GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
2648
2649 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2650 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2651 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2652 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2653 GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2654 GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2655 GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2656 GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
2657
2658 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2659 int32_t b)
2660 {
2661 int64_t res = (int64_t)a - b;
2662 uint8_t round = get_round(vxrm, res, 1);
2663
2664 return (res >> 1) + round;
2665 }
2666
asub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2667 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2668 int64_t b)
2669 {
2670 int64_t res = (int64_t)a - b;
2671 uint8_t round = get_round(vxrm, res, 1);
2672 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2673
2674 /* With signed overflow, bit 64 is inverse of bit 63. */
2675 return ((res >> 1) ^ over) + round;
2676 }
2677
RVVCALL(OPIVV2_RM,vasub_vv_b,OP_SSS_B,H1,H1,H1,asub32)2678 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2679 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2680 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2681 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2682 GEN_VEXT_VV_RM(vasub_vv_b, 1)
2683 GEN_VEXT_VV_RM(vasub_vv_h, 2)
2684 GEN_VEXT_VV_RM(vasub_vv_w, 4)
2685 GEN_VEXT_VV_RM(vasub_vv_d, 8)
2686
2687 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2688 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2689 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2690 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2691 GEN_VEXT_VX_RM(vasub_vx_b, 1)
2692 GEN_VEXT_VX_RM(vasub_vx_h, 2)
2693 GEN_VEXT_VX_RM(vasub_vx_w, 4)
2694 GEN_VEXT_VX_RM(vasub_vx_d, 8)
2695
2696 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2697 uint32_t a, uint32_t b)
2698 {
2699 int64_t res = (int64_t)a - b;
2700 uint8_t round = get_round(vxrm, res, 1);
2701
2702 return (res >> 1) + round;
2703 }
2704
asubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2705 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2706 uint64_t a, uint64_t b)
2707 {
2708 uint64_t res = (uint64_t)a - b;
2709 uint8_t round = get_round(vxrm, res, 1);
2710 uint64_t over = (uint64_t)(res > a) << 63;
2711
2712 return ((res >> 1) | over) + round;
2713 }
2714
RVVCALL(OPIVV2_RM,vasubu_vv_b,OP_UUU_B,H1,H1,H1,asubu32)2715 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2716 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2717 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2718 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2719 GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2720 GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2721 GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2722 GEN_VEXT_VV_RM(vasubu_vv_d, 8)
2723
2724 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2725 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2726 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2727 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2728 GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2729 GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2730 GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2731 GEN_VEXT_VX_RM(vasubu_vx_d, 8)
2732
2733 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2734 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2735 {
2736 uint8_t round;
2737 int16_t res;
2738
2739 res = (int16_t)a * (int16_t)b;
2740 round = get_round(vxrm, res, 7);
2741 res = (res >> 7) + round;
2742
2743 if (res > INT8_MAX) {
2744 env->vxsat = 0x1;
2745 return INT8_MAX;
2746 } else if (res < INT8_MIN) {
2747 env->vxsat = 0x1;
2748 return INT8_MIN;
2749 } else {
2750 return res;
2751 }
2752 }
2753
vsmul16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2754 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2755 {
2756 uint8_t round;
2757 int32_t res;
2758
2759 res = (int32_t)a * (int32_t)b;
2760 round = get_round(vxrm, res, 15);
2761 res = (res >> 15) + round;
2762
2763 if (res > INT16_MAX) {
2764 env->vxsat = 0x1;
2765 return INT16_MAX;
2766 } else if (res < INT16_MIN) {
2767 env->vxsat = 0x1;
2768 return INT16_MIN;
2769 } else {
2770 return res;
2771 }
2772 }
2773
vsmul32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2774 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2775 {
2776 uint8_t round;
2777 int64_t res;
2778
2779 res = (int64_t)a * (int64_t)b;
2780 round = get_round(vxrm, res, 31);
2781 res = (res >> 31) + round;
2782
2783 if (res > INT32_MAX) {
2784 env->vxsat = 0x1;
2785 return INT32_MAX;
2786 } else if (res < INT32_MIN) {
2787 env->vxsat = 0x1;
2788 return INT32_MIN;
2789 } else {
2790 return res;
2791 }
2792 }
2793
vsmul64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2794 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2795 {
2796 uint8_t round;
2797 uint64_t hi_64, lo_64;
2798 int64_t res;
2799
2800 if (a == INT64_MIN && b == INT64_MIN) {
2801 env->vxsat = 1;
2802 return INT64_MAX;
2803 }
2804
2805 muls64(&lo_64, &hi_64, a, b);
2806 round = get_round(vxrm, lo_64, 63);
2807 /*
2808 * Cannot overflow, as there are always
2809 * 2 sign bits after multiply.
2810 */
2811 res = (hi_64 << 1) | (lo_64 >> 63);
2812 if (round) {
2813 if (res == INT64_MAX) {
2814 env->vxsat = 1;
2815 } else {
2816 res += 1;
2817 }
2818 }
2819 return res;
2820 }
2821
RVVCALL(OPIVV2_RM,vsmul_vv_b,OP_SSS_B,H1,H1,H1,vsmul8)2822 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2823 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2824 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2825 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2826 GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2827 GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2828 GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2829 GEN_VEXT_VV_RM(vsmul_vv_d, 8)
2830
2831 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2832 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2833 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2834 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2835 GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2836 GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2837 GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2838 GEN_VEXT_VX_RM(vsmul_vx_d, 8)
2839
2840 /* Vector Single-Width Scaling Shift Instructions */
2841 static inline uint8_t
2842 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2843 {
2844 uint8_t round, shift = b & 0x7;
2845 uint8_t res;
2846
2847 round = get_round(vxrm, a, shift);
2848 res = (a >> shift) + round;
2849 return res;
2850 }
2851 static inline uint16_t
vssrl16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2852 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2853 {
2854 uint8_t round, shift = b & 0xf;
2855
2856 round = get_round(vxrm, a, shift);
2857 return (a >> shift) + round;
2858 }
2859 static inline uint32_t
vssrl32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2860 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2861 {
2862 uint8_t round, shift = b & 0x1f;
2863
2864 round = get_round(vxrm, a, shift);
2865 return (a >> shift) + round;
2866 }
2867 static inline uint64_t
vssrl64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2868 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2869 {
2870 uint8_t round, shift = b & 0x3f;
2871
2872 round = get_round(vxrm, a, shift);
2873 return (a >> shift) + round;
2874 }
RVVCALL(OPIVV2_RM,vssrl_vv_b,OP_UUU_B,H1,H1,H1,vssrl8)2875 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2876 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2877 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2878 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2879 GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2880 GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2881 GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2882 GEN_VEXT_VV_RM(vssrl_vv_d, 8)
2883
2884 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2885 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2886 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2887 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2888 GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2889 GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2890 GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2891 GEN_VEXT_VX_RM(vssrl_vx_d, 8)
2892
2893 static inline int8_t
2894 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2895 {
2896 uint8_t round, shift = b & 0x7;
2897
2898 round = get_round(vxrm, a, shift);
2899 return (a >> shift) + round;
2900 }
2901 static inline int16_t
vssra16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2902 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2903 {
2904 uint8_t round, shift = b & 0xf;
2905
2906 round = get_round(vxrm, a, shift);
2907 return (a >> shift) + round;
2908 }
2909 static inline int32_t
vssra32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2910 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2911 {
2912 uint8_t round, shift = b & 0x1f;
2913
2914 round = get_round(vxrm, a, shift);
2915 return (a >> shift) + round;
2916 }
2917 static inline int64_t
vssra64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2918 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2919 {
2920 uint8_t round, shift = b & 0x3f;
2921
2922 round = get_round(vxrm, a, shift);
2923 return (a >> shift) + round;
2924 }
2925
RVVCALL(OPIVV2_RM,vssra_vv_b,OP_SSS_B,H1,H1,H1,vssra8)2926 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2927 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2928 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2929 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2930 GEN_VEXT_VV_RM(vssra_vv_b, 1)
2931 GEN_VEXT_VV_RM(vssra_vv_h, 2)
2932 GEN_VEXT_VV_RM(vssra_vv_w, 4)
2933 GEN_VEXT_VV_RM(vssra_vv_d, 8)
2934
2935 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2936 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2937 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2938 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2939 GEN_VEXT_VX_RM(vssra_vx_b, 1)
2940 GEN_VEXT_VX_RM(vssra_vx_h, 2)
2941 GEN_VEXT_VX_RM(vssra_vx_w, 4)
2942 GEN_VEXT_VX_RM(vssra_vx_d, 8)
2943
2944 /* Vector Narrowing Fixed-Point Clip Instructions */
2945 static inline int8_t
2946 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2947 {
2948 uint8_t round, shift = b & 0xf;
2949 int16_t res;
2950
2951 round = get_round(vxrm, a, shift);
2952 res = (a >> shift) + round;
2953 if (res > INT8_MAX) {
2954 env->vxsat = 0x1;
2955 return INT8_MAX;
2956 } else if (res < INT8_MIN) {
2957 env->vxsat = 0x1;
2958 return INT8_MIN;
2959 } else {
2960 return res;
2961 }
2962 }
2963
2964 static inline int16_t
vnclip16(CPURISCVState * env,int vxrm,int32_t a,int16_t b)2965 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2966 {
2967 uint8_t round, shift = b & 0x1f;
2968 int32_t res;
2969
2970 round = get_round(vxrm, a, shift);
2971 res = (a >> shift) + round;
2972 if (res > INT16_MAX) {
2973 env->vxsat = 0x1;
2974 return INT16_MAX;
2975 } else if (res < INT16_MIN) {
2976 env->vxsat = 0x1;
2977 return INT16_MIN;
2978 } else {
2979 return res;
2980 }
2981 }
2982
2983 static inline int32_t
vnclip32(CPURISCVState * env,int vxrm,int64_t a,int32_t b)2984 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2985 {
2986 uint8_t round, shift = b & 0x3f;
2987 int64_t res;
2988
2989 round = get_round(vxrm, a, shift);
2990 res = (a >> shift) + round;
2991 if (res > INT32_MAX) {
2992 env->vxsat = 0x1;
2993 return INT32_MAX;
2994 } else if (res < INT32_MIN) {
2995 env->vxsat = 0x1;
2996 return INT32_MIN;
2997 } else {
2998 return res;
2999 }
3000 }
3001
RVVCALL(OPIVV2_RM,vnclip_wv_b,NOP_SSS_B,H1,H2,H1,vnclip8)3002 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
3003 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
3004 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
3005 GEN_VEXT_VV_RM(vnclip_wv_b, 1)
3006 GEN_VEXT_VV_RM(vnclip_wv_h, 2)
3007 GEN_VEXT_VV_RM(vnclip_wv_w, 4)
3008
3009 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
3010 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
3011 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
3012 GEN_VEXT_VX_RM(vnclip_wx_b, 1)
3013 GEN_VEXT_VX_RM(vnclip_wx_h, 2)
3014 GEN_VEXT_VX_RM(vnclip_wx_w, 4)
3015
3016 static inline uint8_t
3017 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
3018 {
3019 uint8_t round, shift = b & 0xf;
3020 uint16_t res;
3021
3022 round = get_round(vxrm, a, shift);
3023 res = (a >> shift) + round;
3024 if (res > UINT8_MAX) {
3025 env->vxsat = 0x1;
3026 return UINT8_MAX;
3027 } else {
3028 return res;
3029 }
3030 }
3031
3032 static inline uint16_t
vnclipu16(CPURISCVState * env,int vxrm,uint32_t a,uint16_t b)3033 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
3034 {
3035 uint8_t round, shift = b & 0x1f;
3036 uint32_t res;
3037
3038 round = get_round(vxrm, a, shift);
3039 res = (a >> shift) + round;
3040 if (res > UINT16_MAX) {
3041 env->vxsat = 0x1;
3042 return UINT16_MAX;
3043 } else {
3044 return res;
3045 }
3046 }
3047
3048 static inline uint32_t
vnclipu32(CPURISCVState * env,int vxrm,uint64_t a,uint32_t b)3049 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
3050 {
3051 uint8_t round, shift = b & 0x3f;
3052 uint64_t res;
3053
3054 round = get_round(vxrm, a, shift);
3055 res = (a >> shift) + round;
3056 if (res > UINT32_MAX) {
3057 env->vxsat = 0x1;
3058 return UINT32_MAX;
3059 } else {
3060 return res;
3061 }
3062 }
3063
RVVCALL(OPIVV2_RM,vnclipu_wv_b,NOP_UUU_B,H1,H2,H1,vnclipu8)3064 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3065 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3066 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
3067 GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
3068 GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
3069 GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
3070
3071 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3072 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3073 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
3074 GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3075 GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3076 GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
3077
3078 /*
3079 * Vector Float Point Arithmetic Instructions
3080 */
3081 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
3082 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3083 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3084 CPURISCVState *env) \
3085 { \
3086 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3087 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3088 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3089 }
3090
3091 #define GEN_VEXT_VV_ENV(NAME, ESZ) \
3092 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3093 void *vs2, CPURISCVState *env, \
3094 uint32_t desc) \
3095 { \
3096 uint32_t vm = vext_vm(desc); \
3097 uint32_t vl = env->vl; \
3098 uint32_t total_elems = \
3099 vext_get_total_elems(env, desc, ESZ); \
3100 uint32_t vta = vext_vta(desc); \
3101 uint32_t vma = vext_vma(desc); \
3102 uint32_t i; \
3103 \
3104 VSTART_CHECK_EARLY_EXIT(env, vl); \
3105 \
3106 for (i = env->vstart; i < vl; i++) { \
3107 if (!vm && !vext_elem_mask(v0, i)) { \
3108 /* set masked-off elements to 1s */ \
3109 vext_set_elems_1s(vd, vma, i * ESZ, \
3110 (i + 1) * ESZ); \
3111 continue; \
3112 } \
3113 do_##NAME(vd, vs1, vs2, i, env); \
3114 } \
3115 env->vstart = 0; \
3116 /* set tail elements to 1s */ \
3117 vext_set_elems_1s(vd, vta, vl * ESZ, \
3118 total_elems * ESZ); \
3119 }
3120
3121 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3122 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3123 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3124 GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3125 GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3126 GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
3127
3128 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3129 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3130 CPURISCVState *env) \
3131 { \
3132 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3133 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3134 }
3135
3136 #define GEN_VEXT_VF(NAME, ESZ) \
3137 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3138 void *vs2, CPURISCVState *env, \
3139 uint32_t desc) \
3140 { \
3141 uint32_t vm = vext_vm(desc); \
3142 uint32_t vl = env->vl; \
3143 uint32_t total_elems = \
3144 vext_get_total_elems(env, desc, ESZ); \
3145 uint32_t vta = vext_vta(desc); \
3146 uint32_t vma = vext_vma(desc); \
3147 uint32_t i; \
3148 \
3149 VSTART_CHECK_EARLY_EXIT(env, vl); \
3150 \
3151 for (i = env->vstart; i < vl; i++) { \
3152 if (!vm && !vext_elem_mask(v0, i)) { \
3153 /* set masked-off elements to 1s */ \
3154 vext_set_elems_1s(vd, vma, i * ESZ, \
3155 (i + 1) * ESZ); \
3156 continue; \
3157 } \
3158 do_##NAME(vd, s1, vs2, i, env); \
3159 } \
3160 env->vstart = 0; \
3161 /* set tail elements to 1s */ \
3162 vext_set_elems_1s(vd, vta, vl * ESZ, \
3163 total_elems * ESZ); \
3164 }
3165
3166 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3167 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3168 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3169 GEN_VEXT_VF(vfadd_vf_h, 2)
3170 GEN_VEXT_VF(vfadd_vf_w, 4)
3171 GEN_VEXT_VF(vfadd_vf_d, 8)
3172
3173 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3174 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3175 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3176 GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3177 GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3178 GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
3179 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3180 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3181 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3182 GEN_VEXT_VF(vfsub_vf_h, 2)
3183 GEN_VEXT_VF(vfsub_vf_w, 4)
3184 GEN_VEXT_VF(vfsub_vf_d, 8)
3185
3186 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3187 {
3188 return float16_sub(b, a, s);
3189 }
3190
float32_rsub(uint32_t a,uint32_t b,float_status * s)3191 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3192 {
3193 return float32_sub(b, a, s);
3194 }
3195
float64_rsub(uint64_t a,uint64_t b,float_status * s)3196 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3197 {
3198 return float64_sub(b, a, s);
3199 }
3200
RVVCALL(OPFVF2,vfrsub_vf_h,OP_UUU_H,H2,H2,float16_rsub)3201 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3202 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3203 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3204 GEN_VEXT_VF(vfrsub_vf_h, 2)
3205 GEN_VEXT_VF(vfrsub_vf_w, 4)
3206 GEN_VEXT_VF(vfrsub_vf_d, 8)
3207
3208 /* Vector Widening Floating-Point Add/Subtract Instructions */
3209 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3210 {
3211 return float32_add(float16_to_float32(a, true, s),
3212 float16_to_float32(b, true, s), s);
3213 }
3214
vfwadd32(uint32_t a,uint32_t b,float_status * s)3215 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3216 {
3217 return float64_add(float32_to_float64(a, s),
3218 float32_to_float64(b, s), s);
3219
3220 }
3221
RVVCALL(OPFVV2,vfwadd_vv_h,WOP_UUU_H,H4,H2,H2,vfwadd16)3222 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3223 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3224 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3225 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
3226 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3227 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3228 GEN_VEXT_VF(vfwadd_vf_h, 4)
3229 GEN_VEXT_VF(vfwadd_vf_w, 8)
3230
3231 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3232 {
3233 return float32_sub(float16_to_float32(a, true, s),
3234 float16_to_float32(b, true, s), s);
3235 }
3236
vfwsub32(uint32_t a,uint32_t b,float_status * s)3237 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3238 {
3239 return float64_sub(float32_to_float64(a, s),
3240 float32_to_float64(b, s), s);
3241
3242 }
3243
RVVCALL(OPFVV2,vfwsub_vv_h,WOP_UUU_H,H4,H2,H2,vfwsub16)3244 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3245 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3246 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3247 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
3248 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3249 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3250 GEN_VEXT_VF(vfwsub_vf_h, 4)
3251 GEN_VEXT_VF(vfwsub_vf_w, 8)
3252
3253 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3254 {
3255 return float32_add(a, float16_to_float32(b, true, s), s);
3256 }
3257
vfwaddw32(uint64_t a,uint32_t b,float_status * s)3258 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3259 {
3260 return float64_add(a, float32_to_float64(b, s), s);
3261 }
3262
RVVCALL(OPFVV2,vfwadd_wv_h,WOP_WUUU_H,H4,H2,H2,vfwaddw16)3263 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3264 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3265 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3266 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
3267 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3268 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3269 GEN_VEXT_VF(vfwadd_wf_h, 4)
3270 GEN_VEXT_VF(vfwadd_wf_w, 8)
3271
3272 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3273 {
3274 return float32_sub(a, float16_to_float32(b, true, s), s);
3275 }
3276
vfwsubw32(uint64_t a,uint32_t b,float_status * s)3277 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3278 {
3279 return float64_sub(a, float32_to_float64(b, s), s);
3280 }
3281
RVVCALL(OPFVV2,vfwsub_wv_h,WOP_WUUU_H,H4,H2,H2,vfwsubw16)3282 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3283 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3284 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3285 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
3286 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3287 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3288 GEN_VEXT_VF(vfwsub_wf_h, 4)
3289 GEN_VEXT_VF(vfwsub_wf_w, 8)
3290
3291 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3292 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3293 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3294 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3295 GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3296 GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3297 GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
3298 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3299 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3300 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3301 GEN_VEXT_VF(vfmul_vf_h, 2)
3302 GEN_VEXT_VF(vfmul_vf_w, 4)
3303 GEN_VEXT_VF(vfmul_vf_d, 8)
3304
3305 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3306 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3307 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3308 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3309 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3310 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
3311 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3312 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3313 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3314 GEN_VEXT_VF(vfdiv_vf_h, 2)
3315 GEN_VEXT_VF(vfdiv_vf_w, 4)
3316 GEN_VEXT_VF(vfdiv_vf_d, 8)
3317
3318 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3319 {
3320 return float16_div(b, a, s);
3321 }
3322
float32_rdiv(uint32_t a,uint32_t b,float_status * s)3323 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3324 {
3325 return float32_div(b, a, s);
3326 }
3327
float64_rdiv(uint64_t a,uint64_t b,float_status * s)3328 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3329 {
3330 return float64_div(b, a, s);
3331 }
3332
RVVCALL(OPFVF2,vfrdiv_vf_h,OP_UUU_H,H2,H2,float16_rdiv)3333 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3334 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3335 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3336 GEN_VEXT_VF(vfrdiv_vf_h, 2)
3337 GEN_VEXT_VF(vfrdiv_vf_w, 4)
3338 GEN_VEXT_VF(vfrdiv_vf_d, 8)
3339
3340 /* Vector Widening Floating-Point Multiply */
3341 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3342 {
3343 return float32_mul(float16_to_float32(a, true, s),
3344 float16_to_float32(b, true, s), s);
3345 }
3346
vfwmul32(uint32_t a,uint32_t b,float_status * s)3347 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3348 {
3349 return float64_mul(float32_to_float64(a, s),
3350 float32_to_float64(b, s), s);
3351
3352 }
RVVCALL(OPFVV2,vfwmul_vv_h,WOP_UUU_H,H4,H2,H2,vfwmul16)3353 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3354 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3355 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3356 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
3357 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3358 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3359 GEN_VEXT_VF(vfwmul_vf_h, 4)
3360 GEN_VEXT_VF(vfwmul_vf_w, 8)
3361
3362 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3363 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3364 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3365 CPURISCVState *env) \
3366 { \
3367 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3368 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3369 TD d = *((TD *)vd + HD(i)); \
3370 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3371 }
3372
3373 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3374 {
3375 return float16_muladd(a, b, d, 0, s);
3376 }
3377
fmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3378 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3379 {
3380 return float32_muladd(a, b, d, 0, s);
3381 }
3382
fmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3383 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3384 {
3385 return float64_muladd(a, b, d, 0, s);
3386 }
3387
RVVCALL(OPFVV3,vfmacc_vv_h,OP_UUU_H,H2,H2,H2,fmacc16)3388 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3389 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3390 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3391 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3392 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3393 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
3394
3395 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3396 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3397 CPURISCVState *env) \
3398 { \
3399 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3400 TD d = *((TD *)vd + HD(i)); \
3401 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3402 }
3403
3404 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3405 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3406 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3407 GEN_VEXT_VF(vfmacc_vf_h, 2)
3408 GEN_VEXT_VF(vfmacc_vf_w, 4)
3409 GEN_VEXT_VF(vfmacc_vf_d, 8)
3410
3411 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3412 {
3413 return float16_muladd(a, b, d, float_muladd_negate_c |
3414 float_muladd_negate_product, s);
3415 }
3416
fnmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3417 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3418 {
3419 return float32_muladd(a, b, d, float_muladd_negate_c |
3420 float_muladd_negate_product, s);
3421 }
3422
fnmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3423 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3424 {
3425 return float64_muladd(a, b, d, float_muladd_negate_c |
3426 float_muladd_negate_product, s);
3427 }
3428
RVVCALL(OPFVV3,vfnmacc_vv_h,OP_UUU_H,H2,H2,H2,fnmacc16)3429 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3430 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3431 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3432 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3433 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3434 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
3435 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3436 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3437 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3438 GEN_VEXT_VF(vfnmacc_vf_h, 2)
3439 GEN_VEXT_VF(vfnmacc_vf_w, 4)
3440 GEN_VEXT_VF(vfnmacc_vf_d, 8)
3441
3442 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3443 {
3444 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3445 }
3446
fmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3447 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3448 {
3449 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3450 }
3451
fmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3452 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3453 {
3454 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3455 }
3456
RVVCALL(OPFVV3,vfmsac_vv_h,OP_UUU_H,H2,H2,H2,fmsac16)3457 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3458 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3459 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3460 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3461 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3462 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
3463 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3464 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3465 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3466 GEN_VEXT_VF(vfmsac_vf_h, 2)
3467 GEN_VEXT_VF(vfmsac_vf_w, 4)
3468 GEN_VEXT_VF(vfmsac_vf_d, 8)
3469
3470 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3471 {
3472 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3473 }
3474
fnmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3475 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3476 {
3477 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3478 }
3479
fnmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3480 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3481 {
3482 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3483 }
3484
RVVCALL(OPFVV3,vfnmsac_vv_h,OP_UUU_H,H2,H2,H2,fnmsac16)3485 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3486 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3487 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3488 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3489 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3490 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
3491 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3492 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3493 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3494 GEN_VEXT_VF(vfnmsac_vf_h, 2)
3495 GEN_VEXT_VF(vfnmsac_vf_w, 4)
3496 GEN_VEXT_VF(vfnmsac_vf_d, 8)
3497
3498 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3499 {
3500 return float16_muladd(d, b, a, 0, s);
3501 }
3502
fmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3503 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3504 {
3505 return float32_muladd(d, b, a, 0, s);
3506 }
3507
fmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3508 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3509 {
3510 return float64_muladd(d, b, a, 0, s);
3511 }
3512
RVVCALL(OPFVV3,vfmadd_vv_h,OP_UUU_H,H2,H2,H2,fmadd16)3513 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3514 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3515 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3516 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3517 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3518 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
3519 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3520 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3521 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3522 GEN_VEXT_VF(vfmadd_vf_h, 2)
3523 GEN_VEXT_VF(vfmadd_vf_w, 4)
3524 GEN_VEXT_VF(vfmadd_vf_d, 8)
3525
3526 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3527 {
3528 return float16_muladd(d, b, a, float_muladd_negate_c |
3529 float_muladd_negate_product, s);
3530 }
3531
fnmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3532 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3533 {
3534 return float32_muladd(d, b, a, float_muladd_negate_c |
3535 float_muladd_negate_product, s);
3536 }
3537
fnmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3538 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3539 {
3540 return float64_muladd(d, b, a, float_muladd_negate_c |
3541 float_muladd_negate_product, s);
3542 }
3543
RVVCALL(OPFVV3,vfnmadd_vv_h,OP_UUU_H,H2,H2,H2,fnmadd16)3544 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3545 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3546 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3547 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3548 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3549 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
3550 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3551 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3552 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3553 GEN_VEXT_VF(vfnmadd_vf_h, 2)
3554 GEN_VEXT_VF(vfnmadd_vf_w, 4)
3555 GEN_VEXT_VF(vfnmadd_vf_d, 8)
3556
3557 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3558 {
3559 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3560 }
3561
fmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3562 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3563 {
3564 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3565 }
3566
fmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3567 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3568 {
3569 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3570 }
3571
RVVCALL(OPFVV3,vfmsub_vv_h,OP_UUU_H,H2,H2,H2,fmsub16)3572 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3573 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3574 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3575 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3576 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3577 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
3578 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3579 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3580 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3581 GEN_VEXT_VF(vfmsub_vf_h, 2)
3582 GEN_VEXT_VF(vfmsub_vf_w, 4)
3583 GEN_VEXT_VF(vfmsub_vf_d, 8)
3584
3585 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3586 {
3587 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3588 }
3589
fnmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3590 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3591 {
3592 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3593 }
3594
fnmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3595 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3596 {
3597 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3598 }
3599
RVVCALL(OPFVV3,vfnmsub_vv_h,OP_UUU_H,H2,H2,H2,fnmsub16)3600 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3601 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3602 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3603 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3604 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3605 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
3606 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3607 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3608 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3609 GEN_VEXT_VF(vfnmsub_vf_h, 2)
3610 GEN_VEXT_VF(vfnmsub_vf_w, 4)
3611 GEN_VEXT_VF(vfnmsub_vf_d, 8)
3612
3613 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3614 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3615 {
3616 return float32_muladd(float16_to_float32(a, true, s),
3617 float16_to_float32(b, true, s), d, 0, s);
3618 }
3619
fwmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3620 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3621 {
3622 return float64_muladd(float32_to_float64(a, s),
3623 float32_to_float64(b, s), d, 0, s);
3624 }
3625
RVVCALL(OPFVV3,vfwmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwmacc16)3626 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3627 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3628 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3629 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
3630 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3631 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3632 GEN_VEXT_VF(vfwmacc_vf_h, 4)
3633 GEN_VEXT_VF(vfwmacc_vf_w, 8)
3634
3635 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3636 {
3637 return float32_muladd(bfloat16_to_float32(a, s),
3638 bfloat16_to_float32(b, s), d, 0, s);
3639 }
3640
RVVCALL(OPFVV3,vfwmaccbf16_vv,WOP_UUU_H,H4,H2,H2,fwmaccbf16)3641 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3642 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3643 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
3644 GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3645
3646 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3647 {
3648 return float32_muladd(float16_to_float32(a, true, s),
3649 float16_to_float32(b, true, s), d,
3650 float_muladd_negate_c | float_muladd_negate_product,
3651 s);
3652 }
3653
fwnmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3654 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3655 {
3656 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3657 d, float_muladd_negate_c |
3658 float_muladd_negate_product, s);
3659 }
3660
RVVCALL(OPFVV3,vfwnmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwnmacc16)3661 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3662 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3663 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3664 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
3665 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3666 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3667 GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3668 GEN_VEXT_VF(vfwnmacc_vf_w, 8)
3669
3670 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3671 {
3672 return float32_muladd(float16_to_float32(a, true, s),
3673 float16_to_float32(b, true, s), d,
3674 float_muladd_negate_c, s);
3675 }
3676
fwmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3677 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3678 {
3679 return float64_muladd(float32_to_float64(a, s),
3680 float32_to_float64(b, s), d,
3681 float_muladd_negate_c, s);
3682 }
3683
RVVCALL(OPFVV3,vfwmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwmsac16)3684 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3685 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3686 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3687 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
3688 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3689 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3690 GEN_VEXT_VF(vfwmsac_vf_h, 4)
3691 GEN_VEXT_VF(vfwmsac_vf_w, 8)
3692
3693 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3694 {
3695 return float32_muladd(float16_to_float32(a, true, s),
3696 float16_to_float32(b, true, s), d,
3697 float_muladd_negate_product, s);
3698 }
3699
fwnmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3700 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3701 {
3702 return float64_muladd(float32_to_float64(a, s),
3703 float32_to_float64(b, s), d,
3704 float_muladd_negate_product, s);
3705 }
3706
RVVCALL(OPFVV3,vfwnmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwnmsac16)3707 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3708 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3709 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3710 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
3711 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3712 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3713 GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3714 GEN_VEXT_VF(vfwnmsac_vf_w, 8)
3715
3716 /* Vector Floating-Point Square-Root Instruction */
3717 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3718 static void do_##NAME(void *vd, void *vs2, int i, \
3719 CPURISCVState *env) \
3720 { \
3721 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3722 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3723 }
3724
3725 #define GEN_VEXT_V_ENV(NAME, ESZ) \
3726 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3727 CPURISCVState *env, uint32_t desc) \
3728 { \
3729 uint32_t vm = vext_vm(desc); \
3730 uint32_t vl = env->vl; \
3731 uint32_t total_elems = \
3732 vext_get_total_elems(env, desc, ESZ); \
3733 uint32_t vta = vext_vta(desc); \
3734 uint32_t vma = vext_vma(desc); \
3735 uint32_t i; \
3736 \
3737 VSTART_CHECK_EARLY_EXIT(env, vl); \
3738 \
3739 if (vl == 0) { \
3740 return; \
3741 } \
3742 for (i = env->vstart; i < vl; i++) { \
3743 if (!vm && !vext_elem_mask(v0, i)) { \
3744 /* set masked-off elements to 1s */ \
3745 vext_set_elems_1s(vd, vma, i * ESZ, \
3746 (i + 1) * ESZ); \
3747 continue; \
3748 } \
3749 do_##NAME(vd, vs2, i, env); \
3750 } \
3751 env->vstart = 0; \
3752 vext_set_elems_1s(vd, vta, vl * ESZ, \
3753 total_elems * ESZ); \
3754 }
3755
3756 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3757 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3758 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3759 GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3760 GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3761 GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
3762
3763 /*
3764 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3765 *
3766 * Adapted from riscv-v-spec recip.c:
3767 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3768 */
3769 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3770 {
3771 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3772 uint64_t exp = extract64(f, frac_size, exp_size);
3773 uint64_t frac = extract64(f, 0, frac_size);
3774
3775 const uint8_t lookup_table[] = {
3776 52, 51, 50, 48, 47, 46, 44, 43,
3777 42, 41, 40, 39, 38, 36, 35, 34,
3778 33, 32, 31, 30, 30, 29, 28, 27,
3779 26, 25, 24, 23, 23, 22, 21, 20,
3780 19, 19, 18, 17, 16, 16, 15, 14,
3781 14, 13, 12, 12, 11, 10, 10, 9,
3782 9, 8, 7, 7, 6, 6, 5, 4,
3783 4, 3, 3, 2, 2, 1, 1, 0,
3784 127, 125, 123, 121, 119, 118, 116, 114,
3785 113, 111, 109, 108, 106, 105, 103, 102,
3786 100, 99, 97, 96, 95, 93, 92, 91,
3787 90, 88, 87, 86, 85, 84, 83, 82,
3788 80, 79, 78, 77, 76, 75, 74, 73,
3789 72, 71, 70, 70, 69, 68, 67, 66,
3790 65, 64, 63, 63, 62, 61, 60, 59,
3791 59, 58, 57, 56, 56, 55, 54, 53
3792 };
3793 const int precision = 7;
3794
3795 if (exp == 0 && frac != 0) { /* subnormal */
3796 /* Normalize the subnormal. */
3797 while (extract64(frac, frac_size - 1, 1) == 0) {
3798 exp--;
3799 frac <<= 1;
3800 }
3801
3802 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3803 }
3804
3805 int idx = ((exp & 1) << (precision - 1)) |
3806 (frac >> (frac_size - precision + 1));
3807 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3808 (frac_size - precision);
3809 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3810
3811 uint64_t val = 0;
3812 val = deposit64(val, 0, frac_size, out_frac);
3813 val = deposit64(val, frac_size, exp_size, out_exp);
3814 val = deposit64(val, frac_size + exp_size, 1, sign);
3815 return val;
3816 }
3817
frsqrt7_h(float16 f,float_status * s)3818 static float16 frsqrt7_h(float16 f, float_status *s)
3819 {
3820 int exp_size = 5, frac_size = 10;
3821 bool sign = float16_is_neg(f);
3822
3823 /*
3824 * frsqrt7(sNaN) = canonical NaN
3825 * frsqrt7(-inf) = canonical NaN
3826 * frsqrt7(-normal) = canonical NaN
3827 * frsqrt7(-subnormal) = canonical NaN
3828 */
3829 if (float16_is_signaling_nan(f, s) ||
3830 (float16_is_infinity(f) && sign) ||
3831 (float16_is_normal(f) && sign) ||
3832 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3833 s->float_exception_flags |= float_flag_invalid;
3834 return float16_default_nan(s);
3835 }
3836
3837 /* frsqrt7(qNaN) = canonical NaN */
3838 if (float16_is_quiet_nan(f, s)) {
3839 return float16_default_nan(s);
3840 }
3841
3842 /* frsqrt7(+-0) = +-inf */
3843 if (float16_is_zero(f)) {
3844 s->float_exception_flags |= float_flag_divbyzero;
3845 return float16_set_sign(float16_infinity, sign);
3846 }
3847
3848 /* frsqrt7(+inf) = +0 */
3849 if (float16_is_infinity(f) && !sign) {
3850 return float16_set_sign(float16_zero, sign);
3851 }
3852
3853 /* +normal, +subnormal */
3854 uint64_t val = frsqrt7(f, exp_size, frac_size);
3855 return make_float16(val);
3856 }
3857
frsqrt7_s(float32 f,float_status * s)3858 static float32 frsqrt7_s(float32 f, float_status *s)
3859 {
3860 int exp_size = 8, frac_size = 23;
3861 bool sign = float32_is_neg(f);
3862
3863 /*
3864 * frsqrt7(sNaN) = canonical NaN
3865 * frsqrt7(-inf) = canonical NaN
3866 * frsqrt7(-normal) = canonical NaN
3867 * frsqrt7(-subnormal) = canonical NaN
3868 */
3869 if (float32_is_signaling_nan(f, s) ||
3870 (float32_is_infinity(f) && sign) ||
3871 (float32_is_normal(f) && sign) ||
3872 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3873 s->float_exception_flags |= float_flag_invalid;
3874 return float32_default_nan(s);
3875 }
3876
3877 /* frsqrt7(qNaN) = canonical NaN */
3878 if (float32_is_quiet_nan(f, s)) {
3879 return float32_default_nan(s);
3880 }
3881
3882 /* frsqrt7(+-0) = +-inf */
3883 if (float32_is_zero(f)) {
3884 s->float_exception_flags |= float_flag_divbyzero;
3885 return float32_set_sign(float32_infinity, sign);
3886 }
3887
3888 /* frsqrt7(+inf) = +0 */
3889 if (float32_is_infinity(f) && !sign) {
3890 return float32_set_sign(float32_zero, sign);
3891 }
3892
3893 /* +normal, +subnormal */
3894 uint64_t val = frsqrt7(f, exp_size, frac_size);
3895 return make_float32(val);
3896 }
3897
frsqrt7_d(float64 f,float_status * s)3898 static float64 frsqrt7_d(float64 f, float_status *s)
3899 {
3900 int exp_size = 11, frac_size = 52;
3901 bool sign = float64_is_neg(f);
3902
3903 /*
3904 * frsqrt7(sNaN) = canonical NaN
3905 * frsqrt7(-inf) = canonical NaN
3906 * frsqrt7(-normal) = canonical NaN
3907 * frsqrt7(-subnormal) = canonical NaN
3908 */
3909 if (float64_is_signaling_nan(f, s) ||
3910 (float64_is_infinity(f) && sign) ||
3911 (float64_is_normal(f) && sign) ||
3912 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3913 s->float_exception_flags |= float_flag_invalid;
3914 return float64_default_nan(s);
3915 }
3916
3917 /* frsqrt7(qNaN) = canonical NaN */
3918 if (float64_is_quiet_nan(f, s)) {
3919 return float64_default_nan(s);
3920 }
3921
3922 /* frsqrt7(+-0) = +-inf */
3923 if (float64_is_zero(f)) {
3924 s->float_exception_flags |= float_flag_divbyzero;
3925 return float64_set_sign(float64_infinity, sign);
3926 }
3927
3928 /* frsqrt7(+inf) = +0 */
3929 if (float64_is_infinity(f) && !sign) {
3930 return float64_set_sign(float64_zero, sign);
3931 }
3932
3933 /* +normal, +subnormal */
3934 uint64_t val = frsqrt7(f, exp_size, frac_size);
3935 return make_float64(val);
3936 }
3937
RVVCALL(OPFVV1,vfrsqrt7_v_h,OP_UU_H,H2,H2,frsqrt7_h)3938 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3939 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3940 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3941 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3942 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3943 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
3944
3945 /*
3946 * Vector Floating-Point Reciprocal Estimate Instruction
3947 *
3948 * Adapted from riscv-v-spec recip.c:
3949 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3950 */
3951 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3952 float_status *s)
3953 {
3954 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3955 uint64_t exp = extract64(f, frac_size, exp_size);
3956 uint64_t frac = extract64(f, 0, frac_size);
3957
3958 const uint8_t lookup_table[] = {
3959 127, 125, 123, 121, 119, 117, 116, 114,
3960 112, 110, 109, 107, 105, 104, 102, 100,
3961 99, 97, 96, 94, 93, 91, 90, 88,
3962 87, 85, 84, 83, 81, 80, 79, 77,
3963 76, 75, 74, 72, 71, 70, 69, 68,
3964 66, 65, 64, 63, 62, 61, 60, 59,
3965 58, 57, 56, 55, 54, 53, 52, 51,
3966 50, 49, 48, 47, 46, 45, 44, 43,
3967 42, 41, 40, 40, 39, 38, 37, 36,
3968 35, 35, 34, 33, 32, 31, 31, 30,
3969 29, 28, 28, 27, 26, 25, 25, 24,
3970 23, 23, 22, 21, 21, 20, 19, 19,
3971 18, 17, 17, 16, 15, 15, 14, 14,
3972 13, 12, 12, 11, 11, 10, 9, 9,
3973 8, 8, 7, 7, 6, 5, 5, 4,
3974 4, 3, 3, 2, 2, 1, 1, 0
3975 };
3976 const int precision = 7;
3977
3978 if (exp == 0 && frac != 0) { /* subnormal */
3979 /* Normalize the subnormal. */
3980 while (extract64(frac, frac_size - 1, 1) == 0) {
3981 exp--;
3982 frac <<= 1;
3983 }
3984
3985 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3986
3987 if (exp != 0 && exp != UINT64_MAX) {
3988 /*
3989 * Overflow to inf or max value of same sign,
3990 * depending on sign and rounding mode.
3991 */
3992 s->float_exception_flags |= (float_flag_inexact |
3993 float_flag_overflow);
3994
3995 if ((s->float_rounding_mode == float_round_to_zero) ||
3996 ((s->float_rounding_mode == float_round_down) && !sign) ||
3997 ((s->float_rounding_mode == float_round_up) && sign)) {
3998 /* Return greatest/negative finite value. */
3999 return (sign << (exp_size + frac_size)) |
4000 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
4001 } else {
4002 /* Return +-inf. */
4003 return (sign << (exp_size + frac_size)) |
4004 MAKE_64BIT_MASK(frac_size, exp_size);
4005 }
4006 }
4007 }
4008
4009 int idx = frac >> (frac_size - precision);
4010 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
4011 (frac_size - precision);
4012 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
4013
4014 if (out_exp == 0 || out_exp == UINT64_MAX) {
4015 /*
4016 * The result is subnormal, but don't raise the underflow exception,
4017 * because there's no additional loss of precision.
4018 */
4019 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
4020 if (out_exp == UINT64_MAX) {
4021 out_frac >>= 1;
4022 out_exp = 0;
4023 }
4024 }
4025
4026 uint64_t val = 0;
4027 val = deposit64(val, 0, frac_size, out_frac);
4028 val = deposit64(val, frac_size, exp_size, out_exp);
4029 val = deposit64(val, frac_size + exp_size, 1, sign);
4030 return val;
4031 }
4032
frec7_h(float16 f,float_status * s)4033 static float16 frec7_h(float16 f, float_status *s)
4034 {
4035 int exp_size = 5, frac_size = 10;
4036 bool sign = float16_is_neg(f);
4037
4038 /* frec7(+-inf) = +-0 */
4039 if (float16_is_infinity(f)) {
4040 return float16_set_sign(float16_zero, sign);
4041 }
4042
4043 /* frec7(+-0) = +-inf */
4044 if (float16_is_zero(f)) {
4045 s->float_exception_flags |= float_flag_divbyzero;
4046 return float16_set_sign(float16_infinity, sign);
4047 }
4048
4049 /* frec7(sNaN) = canonical NaN */
4050 if (float16_is_signaling_nan(f, s)) {
4051 s->float_exception_flags |= float_flag_invalid;
4052 return float16_default_nan(s);
4053 }
4054
4055 /* frec7(qNaN) = canonical NaN */
4056 if (float16_is_quiet_nan(f, s)) {
4057 return float16_default_nan(s);
4058 }
4059
4060 /* +-normal, +-subnormal */
4061 uint64_t val = frec7(f, exp_size, frac_size, s);
4062 return make_float16(val);
4063 }
4064
frec7_s(float32 f,float_status * s)4065 static float32 frec7_s(float32 f, float_status *s)
4066 {
4067 int exp_size = 8, frac_size = 23;
4068 bool sign = float32_is_neg(f);
4069
4070 /* frec7(+-inf) = +-0 */
4071 if (float32_is_infinity(f)) {
4072 return float32_set_sign(float32_zero, sign);
4073 }
4074
4075 /* frec7(+-0) = +-inf */
4076 if (float32_is_zero(f)) {
4077 s->float_exception_flags |= float_flag_divbyzero;
4078 return float32_set_sign(float32_infinity, sign);
4079 }
4080
4081 /* frec7(sNaN) = canonical NaN */
4082 if (float32_is_signaling_nan(f, s)) {
4083 s->float_exception_flags |= float_flag_invalid;
4084 return float32_default_nan(s);
4085 }
4086
4087 /* frec7(qNaN) = canonical NaN */
4088 if (float32_is_quiet_nan(f, s)) {
4089 return float32_default_nan(s);
4090 }
4091
4092 /* +-normal, +-subnormal */
4093 uint64_t val = frec7(f, exp_size, frac_size, s);
4094 return make_float32(val);
4095 }
4096
frec7_d(float64 f,float_status * s)4097 static float64 frec7_d(float64 f, float_status *s)
4098 {
4099 int exp_size = 11, frac_size = 52;
4100 bool sign = float64_is_neg(f);
4101
4102 /* frec7(+-inf) = +-0 */
4103 if (float64_is_infinity(f)) {
4104 return float64_set_sign(float64_zero, sign);
4105 }
4106
4107 /* frec7(+-0) = +-inf */
4108 if (float64_is_zero(f)) {
4109 s->float_exception_flags |= float_flag_divbyzero;
4110 return float64_set_sign(float64_infinity, sign);
4111 }
4112
4113 /* frec7(sNaN) = canonical NaN */
4114 if (float64_is_signaling_nan(f, s)) {
4115 s->float_exception_flags |= float_flag_invalid;
4116 return float64_default_nan(s);
4117 }
4118
4119 /* frec7(qNaN) = canonical NaN */
4120 if (float64_is_quiet_nan(f, s)) {
4121 return float64_default_nan(s);
4122 }
4123
4124 /* +-normal, +-subnormal */
4125 uint64_t val = frec7(f, exp_size, frac_size, s);
4126 return make_float64(val);
4127 }
4128
RVVCALL(OPFVV1,vfrec7_v_h,OP_UU_H,H2,H2,frec7_h)4129 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4130 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4131 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
4132 GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4133 GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4134 GEN_VEXT_V_ENV(vfrec7_v_d, 8)
4135
4136 /* Vector Floating-Point MIN/MAX Instructions */
4137 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4138 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4139 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
4140 GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4141 GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4142 GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
4143 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4144 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4145 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
4146 GEN_VEXT_VF(vfmin_vf_h, 2)
4147 GEN_VEXT_VF(vfmin_vf_w, 4)
4148 GEN_VEXT_VF(vfmin_vf_d, 8)
4149
4150 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4151 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4152 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
4153 GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4154 GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4155 GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
4156 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4157 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4158 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
4159 GEN_VEXT_VF(vfmax_vf_h, 2)
4160 GEN_VEXT_VF(vfmax_vf_w, 4)
4161 GEN_VEXT_VF(vfmax_vf_d, 8)
4162
4163 /* Vector Floating-Point Sign-Injection Instructions */
4164 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4165 {
4166 return deposit64(b, 0, 15, a);
4167 }
4168
fsgnj32(uint32_t a,uint32_t b,float_status * s)4169 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4170 {
4171 return deposit64(b, 0, 31, a);
4172 }
4173
fsgnj64(uint64_t a,uint64_t b,float_status * s)4174 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4175 {
4176 return deposit64(b, 0, 63, a);
4177 }
4178
RVVCALL(OPFVV2,vfsgnj_vv_h,OP_UUU_H,H2,H2,H2,fsgnj16)4179 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4180 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4181 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
4182 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4183 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4184 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
4185 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4186 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4187 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
4188 GEN_VEXT_VF(vfsgnj_vf_h, 2)
4189 GEN_VEXT_VF(vfsgnj_vf_w, 4)
4190 GEN_VEXT_VF(vfsgnj_vf_d, 8)
4191
4192 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4193 {
4194 return deposit64(~b, 0, 15, a);
4195 }
4196
fsgnjn32(uint32_t a,uint32_t b,float_status * s)4197 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4198 {
4199 return deposit64(~b, 0, 31, a);
4200 }
4201
fsgnjn64(uint64_t a,uint64_t b,float_status * s)4202 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4203 {
4204 return deposit64(~b, 0, 63, a);
4205 }
4206
RVVCALL(OPFVV2,vfsgnjn_vv_h,OP_UUU_H,H2,H2,H2,fsgnjn16)4207 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4208 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4209 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
4210 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4211 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4212 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
4213 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4214 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4215 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
4216 GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4217 GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4218 GEN_VEXT_VF(vfsgnjn_vf_d, 8)
4219
4220 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4221 {
4222 return deposit64(b ^ a, 0, 15, a);
4223 }
4224
fsgnjx32(uint32_t a,uint32_t b,float_status * s)4225 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4226 {
4227 return deposit64(b ^ a, 0, 31, a);
4228 }
4229
fsgnjx64(uint64_t a,uint64_t b,float_status * s)4230 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4231 {
4232 return deposit64(b ^ a, 0, 63, a);
4233 }
4234
RVVCALL(OPFVV2,vfsgnjx_vv_h,OP_UUU_H,H2,H2,H2,fsgnjx16)4235 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4236 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4237 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4238 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4239 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4240 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
4241 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4242 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4243 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4244 GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4245 GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4246 GEN_VEXT_VF(vfsgnjx_vf_d, 8)
4247
4248 /* Vector Floating-Point Compare Instructions */
4249 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4250 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4251 CPURISCVState *env, uint32_t desc) \
4252 { \
4253 uint32_t vm = vext_vm(desc); \
4254 uint32_t vl = env->vl; \
4255 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4256 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4257 uint32_t vma = vext_vma(desc); \
4258 uint32_t i; \
4259 \
4260 VSTART_CHECK_EARLY_EXIT(env, vl); \
4261 \
4262 for (i = env->vstart; i < vl; i++) { \
4263 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4264 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4265 if (!vm && !vext_elem_mask(v0, i)) { \
4266 /* set masked-off elements to 1s */ \
4267 if (vma) { \
4268 vext_set_elem_mask(vd, i, 1); \
4269 } \
4270 continue; \
4271 } \
4272 vext_set_elem_mask(vd, i, \
4273 DO_OP(s2, s1, &env->fp_status)); \
4274 } \
4275 env->vstart = 0; \
4276 /*
4277 * mask destination register are always tail-agnostic
4278 * set tail elements to 1s
4279 */ \
4280 if (vta_all_1s) { \
4281 for (; i < total_elems; i++) { \
4282 vext_set_elem_mask(vd, i, 1); \
4283 } \
4284 } \
4285 }
4286
4287 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4288 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4289 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4290
4291 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4292 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4293 CPURISCVState *env, uint32_t desc) \
4294 { \
4295 uint32_t vm = vext_vm(desc); \
4296 uint32_t vl = env->vl; \
4297 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4298 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4299 uint32_t vma = vext_vma(desc); \
4300 uint32_t i; \
4301 \
4302 VSTART_CHECK_EARLY_EXIT(env, vl); \
4303 \
4304 for (i = env->vstart; i < vl; i++) { \
4305 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4306 if (!vm && !vext_elem_mask(v0, i)) { \
4307 /* set masked-off elements to 1s */ \
4308 if (vma) { \
4309 vext_set_elem_mask(vd, i, 1); \
4310 } \
4311 continue; \
4312 } \
4313 vext_set_elem_mask(vd, i, \
4314 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4315 } \
4316 env->vstart = 0; \
4317 /*
4318 * mask destination register are always tail-agnostic
4319 * set tail elements to 1s
4320 */ \
4321 if (vta_all_1s) { \
4322 for (; i < total_elems; i++) { \
4323 vext_set_elem_mask(vd, i, 1); \
4324 } \
4325 } \
4326 }
4327
4328 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4329 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4330 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4331
4332 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4333 {
4334 FloatRelation compare = float16_compare_quiet(a, b, s);
4335 return compare != float_relation_equal;
4336 }
4337
vmfne32(uint32_t a,uint32_t b,float_status * s)4338 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4339 {
4340 FloatRelation compare = float32_compare_quiet(a, b, s);
4341 return compare != float_relation_equal;
4342 }
4343
vmfne64(uint64_t a,uint64_t b,float_status * s)4344 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4345 {
4346 FloatRelation compare = float64_compare_quiet(a, b, s);
4347 return compare != float_relation_equal;
4348 }
4349
GEN_VEXT_CMP_VV_ENV(vmfne_vv_h,uint16_t,H2,vmfne16)4350 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4351 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4352 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4353 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4354 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4355 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4356
4357 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4358 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4359 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4360 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4361 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4362 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4363
4364 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4365 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4366 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4367 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4368 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4369 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4370
4371 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4372 {
4373 FloatRelation compare = float16_compare(a, b, s);
4374 return compare == float_relation_greater;
4375 }
4376
vmfgt32(uint32_t a,uint32_t b,float_status * s)4377 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4378 {
4379 FloatRelation compare = float32_compare(a, b, s);
4380 return compare == float_relation_greater;
4381 }
4382
vmfgt64(uint64_t a,uint64_t b,float_status * s)4383 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4384 {
4385 FloatRelation compare = float64_compare(a, b, s);
4386 return compare == float_relation_greater;
4387 }
4388
GEN_VEXT_CMP_VF(vmfgt_vf_h,uint16_t,H2,vmfgt16)4389 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4390 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4391 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4392
4393 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4394 {
4395 FloatRelation compare = float16_compare(a, b, s);
4396 return compare == float_relation_greater ||
4397 compare == float_relation_equal;
4398 }
4399
vmfge32(uint32_t a,uint32_t b,float_status * s)4400 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4401 {
4402 FloatRelation compare = float32_compare(a, b, s);
4403 return compare == float_relation_greater ||
4404 compare == float_relation_equal;
4405 }
4406
vmfge64(uint64_t a,uint64_t b,float_status * s)4407 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4408 {
4409 FloatRelation compare = float64_compare(a, b, s);
4410 return compare == float_relation_greater ||
4411 compare == float_relation_equal;
4412 }
4413
GEN_VEXT_CMP_VF(vmfge_vf_h,uint16_t,H2,vmfge16)4414 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4415 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4416 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4417
4418 /* Vector Floating-Point Classify Instruction */
4419 target_ulong fclass_h(uint64_t frs1)
4420 {
4421 float16 f = frs1;
4422 bool sign = float16_is_neg(f);
4423
4424 if (float16_is_infinity(f)) {
4425 return sign ? 1 << 0 : 1 << 7;
4426 } else if (float16_is_zero(f)) {
4427 return sign ? 1 << 3 : 1 << 4;
4428 } else if (float16_is_zero_or_denormal(f)) {
4429 return sign ? 1 << 2 : 1 << 5;
4430 } else if (float16_is_any_nan(f)) {
4431 float_status s = { }; /* for snan_bit_is_one */
4432 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4433 } else {
4434 return sign ? 1 << 1 : 1 << 6;
4435 }
4436 }
4437
fclass_s(uint64_t frs1)4438 target_ulong fclass_s(uint64_t frs1)
4439 {
4440 float32 f = frs1;
4441 bool sign = float32_is_neg(f);
4442
4443 if (float32_is_infinity(f)) {
4444 return sign ? 1 << 0 : 1 << 7;
4445 } else if (float32_is_zero(f)) {
4446 return sign ? 1 << 3 : 1 << 4;
4447 } else if (float32_is_zero_or_denormal(f)) {
4448 return sign ? 1 << 2 : 1 << 5;
4449 } else if (float32_is_any_nan(f)) {
4450 float_status s = { }; /* for snan_bit_is_one */
4451 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4452 } else {
4453 return sign ? 1 << 1 : 1 << 6;
4454 }
4455 }
4456
fclass_d(uint64_t frs1)4457 target_ulong fclass_d(uint64_t frs1)
4458 {
4459 float64 f = frs1;
4460 bool sign = float64_is_neg(f);
4461
4462 if (float64_is_infinity(f)) {
4463 return sign ? 1 << 0 : 1 << 7;
4464 } else if (float64_is_zero(f)) {
4465 return sign ? 1 << 3 : 1 << 4;
4466 } else if (float64_is_zero_or_denormal(f)) {
4467 return sign ? 1 << 2 : 1 << 5;
4468 } else if (float64_is_any_nan(f)) {
4469 float_status s = { }; /* for snan_bit_is_one */
4470 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4471 } else {
4472 return sign ? 1 << 1 : 1 << 6;
4473 }
4474 }
4475
RVVCALL(OPIVV1,vfclass_v_h,OP_UU_H,H2,H2,fclass_h)4476 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4477 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4478 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4479 GEN_VEXT_V(vfclass_v_h, 2)
4480 GEN_VEXT_V(vfclass_v_w, 4)
4481 GEN_VEXT_V(vfclass_v_d, 8)
4482
4483 /* Vector Floating-Point Merge Instruction */
4484
4485 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4486 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4487 CPURISCVState *env, uint32_t desc) \
4488 { \
4489 uint32_t vm = vext_vm(desc); \
4490 uint32_t vl = env->vl; \
4491 uint32_t esz = sizeof(ETYPE); \
4492 uint32_t total_elems = \
4493 vext_get_total_elems(env, desc, esz); \
4494 uint32_t vta = vext_vta(desc); \
4495 uint32_t i; \
4496 \
4497 VSTART_CHECK_EARLY_EXIT(env, vl); \
4498 \
4499 for (i = env->vstart; i < vl; i++) { \
4500 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4501 *((ETYPE *)vd + H(i)) = \
4502 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4503 } \
4504 env->vstart = 0; \
4505 /* set tail elements to 1s */ \
4506 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4507 }
4508
4509 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4510 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4511 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4512
4513 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4514 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4515 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4516 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4517 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4518 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4519 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4520 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
4521
4522 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4523 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4524 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4525 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4526 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4527 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4528 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
4529
4530 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4531 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4532 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4533 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4534 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4535 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4536 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
4537
4538 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4539 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4540 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4541 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4542 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4543 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4544 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4545
4546 /* Widening Floating-Point/Integer Type-Convert Instructions */
4547 /* (TD, T2, TX2) */
4548 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4549 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4550 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4551 /*
4552 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4553 */
4554 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4555 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4556 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4557 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4558
4559 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4560 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4561 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4562 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4563 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4564
4565 /*
4566 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4567 */
4568 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4569 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4570 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4571 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4572 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4573 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4574
4575 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4576 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4577 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4578 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4579 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4580 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4581 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4582
4583 /*
4584 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4585 */
4586 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4587 {
4588 return float16_to_float32(a, true, s);
4589 }
4590
RVVCALL(OPFVV1,vfwcvt_f_f_v_h,WOP_UU_H,H4,H2,vfwcvtffv16)4591 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4592 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4593 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4594 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
4595
4596 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4597 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4598
4599 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4600 /* (TD, T2, TX2) */
4601 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4602 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4603 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4604 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4605 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4606 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4607 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4608 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4609 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4610 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
4611
4612 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4613 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4614 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4615 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4616 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4617 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4618 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
4619
4620 /*
4621 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4622 */
4623 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4624 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4625 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4626 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
4627
4628 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4629 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4630 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4631 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4632 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
4633
4634 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4635 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4636 {
4637 return float32_to_float16(a, true, s);
4638 }
4639
RVVCALL(OPFVV1,vfncvt_f_f_w_h,NOP_UU_H,H2,H4,vfncvtffv16)4640 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4641 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4642 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4643 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
4644
4645 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4646 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4647
4648 /*
4649 * Vector Reduction Operations
4650 */
4651 /* Vector Single-Width Integer Reduction Instructions */
4652 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4653 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4654 void *vs2, CPURISCVState *env, \
4655 uint32_t desc) \
4656 { \
4657 uint32_t vm = vext_vm(desc); \
4658 uint32_t vl = env->vl; \
4659 uint32_t esz = sizeof(TD); \
4660 uint32_t vlenb = simd_maxsz(desc); \
4661 uint32_t vta = vext_vta(desc); \
4662 uint32_t i; \
4663 TD s1 = *((TD *)vs1 + HD(0)); \
4664 \
4665 VSTART_CHECK_EARLY_EXIT(env, vl); \
4666 \
4667 for (i = env->vstart; i < vl; i++) { \
4668 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4669 if (!vm && !vext_elem_mask(v0, i)) { \
4670 continue; \
4671 } \
4672 s1 = OP(s1, (TD)s2); \
4673 } \
4674 if (vl > 0) { \
4675 *((TD *)vd + HD(0)) = s1; \
4676 } \
4677 env->vstart = 0; \
4678 /* set tail elements to 1s */ \
4679 vext_set_elems_1s(vd, vta, esz, vlenb); \
4680 }
4681
4682 /* vd[0] = sum(vs1[0], vs2[*]) */
4683 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4684 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4685 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4686 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4687
4688 /* vd[0] = maxu(vs1[0], vs2[*]) */
4689 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4690 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4691 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4692 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4693
4694 /* vd[0] = max(vs1[0], vs2[*]) */
4695 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4696 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4697 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4698 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4699
4700 /* vd[0] = minu(vs1[0], vs2[*]) */
4701 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4702 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4703 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4704 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4705
4706 /* vd[0] = min(vs1[0], vs2[*]) */
4707 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4708 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4709 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4710 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4711
4712 /* vd[0] = and(vs1[0], vs2[*]) */
4713 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4714 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4715 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4716 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4717
4718 /* vd[0] = or(vs1[0], vs2[*]) */
4719 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4720 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4721 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4722 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4723
4724 /* vd[0] = xor(vs1[0], vs2[*]) */
4725 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4726 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4727 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4728 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4729
4730 /* Vector Widening Integer Reduction Instructions */
4731 /* signed sum reduction into double-width accumulator */
4732 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4733 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4734 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4735
4736 /* Unsigned sum reduction into double-width accumulator */
4737 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4738 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4739 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4740
4741 /* Vector Single-Width Floating-Point Reduction Instructions */
4742 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4743 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4744 void *vs2, CPURISCVState *env, \
4745 uint32_t desc) \
4746 { \
4747 uint32_t vm = vext_vm(desc); \
4748 uint32_t vl = env->vl; \
4749 uint32_t esz = sizeof(TD); \
4750 uint32_t vlenb = simd_maxsz(desc); \
4751 uint32_t vta = vext_vta(desc); \
4752 uint32_t i; \
4753 TD s1 = *((TD *)vs1 + HD(0)); \
4754 \
4755 VSTART_CHECK_EARLY_EXIT(env, vl); \
4756 \
4757 for (i = env->vstart; i < vl; i++) { \
4758 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4759 if (!vm && !vext_elem_mask(v0, i)) { \
4760 continue; \
4761 } \
4762 s1 = OP(s1, (TD)s2, &env->fp_status); \
4763 } \
4764 if (vl > 0) { \
4765 *((TD *)vd + HD(0)) = s1; \
4766 } \
4767 env->vstart = 0; \
4768 /* set tail elements to 1s */ \
4769 vext_set_elems_1s(vd, vta, esz, vlenb); \
4770 }
4771
4772 /* Unordered sum */
4773 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4774 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4775 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4776
4777 /* Ordered sum */
4778 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4779 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4780 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4781
4782 /* Maximum value */
4783 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4784 float16_maximum_number)
4785 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4786 float32_maximum_number)
4787 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4788 float64_maximum_number)
4789
4790 /* Minimum value */
4791 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4792 float16_minimum_number)
4793 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4794 float32_minimum_number)
4795 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4796 float64_minimum_number)
4797
4798 /* Vector Widening Floating-Point Add Instructions */
4799 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
4800 {
4801 return float32_add(a, float16_to_float32(b, true, s), s);
4802 }
4803
fwadd32(uint64_t a,uint32_t b,float_status * s)4804 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
4805 {
4806 return float64_add(a, float32_to_float64(b, s), s);
4807 }
4808
4809 /* Vector Widening Floating-Point Reduction Instructions */
4810 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
GEN_VEXT_FRED(vfwredusum_vs_h,uint32_t,uint16_t,H4,H2,fwadd16)4811 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4812 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4813 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4814 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4815
4816 /*
4817 * Vector Mask Operations
4818 */
4819 /* Vector Mask-Register Logical Instructions */
4820 #define GEN_VEXT_MASK_VV(NAME, OP) \
4821 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4822 void *vs2, CPURISCVState *env, \
4823 uint32_t desc) \
4824 { \
4825 uint32_t vl = env->vl; \
4826 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
4827 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4828 uint32_t i; \
4829 int a, b; \
4830 \
4831 VSTART_CHECK_EARLY_EXIT(env, vl); \
4832 \
4833 for (i = env->vstart; i < vl; i++) { \
4834 a = vext_elem_mask(vs1, i); \
4835 b = vext_elem_mask(vs2, i); \
4836 vext_set_elem_mask(vd, i, OP(b, a)); \
4837 } \
4838 env->vstart = 0; \
4839 /*
4840 * mask destination register are always tail-agnostic
4841 * set tail elements to 1s
4842 */ \
4843 if (vta_all_1s) { \
4844 for (; i < total_elems; i++) { \
4845 vext_set_elem_mask(vd, i, 1); \
4846 } \
4847 } \
4848 }
4849
4850 #define DO_NAND(N, M) (!(N & M))
4851 #define DO_ANDNOT(N, M) (N & !M)
4852 #define DO_NOR(N, M) (!(N | M))
4853 #define DO_ORNOT(N, M) (N | !M)
4854 #define DO_XNOR(N, M) (!(N ^ M))
4855
4856 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4857 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4858 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4859 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4860 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4861 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4862 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4863 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4864
4865 /* Vector count population in mask vcpop */
4866 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4867 uint32_t desc)
4868 {
4869 target_ulong cnt = 0;
4870 uint32_t vm = vext_vm(desc);
4871 uint32_t vl = env->vl;
4872 int i;
4873
4874 for (i = env->vstart; i < vl; i++) {
4875 if (vm || vext_elem_mask(v0, i)) {
4876 if (vext_elem_mask(vs2, i)) {
4877 cnt++;
4878 }
4879 }
4880 }
4881 env->vstart = 0;
4882 return cnt;
4883 }
4884
4885 /* vfirst find-first-set mask bit */
HELPER(vfirst_m)4886 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4887 uint32_t desc)
4888 {
4889 uint32_t vm = vext_vm(desc);
4890 uint32_t vl = env->vl;
4891 int i;
4892
4893 for (i = env->vstart; i < vl; i++) {
4894 if (vm || vext_elem_mask(v0, i)) {
4895 if (vext_elem_mask(vs2, i)) {
4896 return i;
4897 }
4898 }
4899 }
4900 env->vstart = 0;
4901 return -1LL;
4902 }
4903
4904 enum set_mask_type {
4905 ONLY_FIRST = 1,
4906 INCLUDE_FIRST,
4907 BEFORE_FIRST,
4908 };
4909
vmsetm(void * vd,void * v0,void * vs2,CPURISCVState * env,uint32_t desc,enum set_mask_type type)4910 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4911 uint32_t desc, enum set_mask_type type)
4912 {
4913 uint32_t vm = vext_vm(desc);
4914 uint32_t vl = env->vl;
4915 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
4916 uint32_t vta_all_1s = vext_vta_all_1s(desc);
4917 uint32_t vma = vext_vma(desc);
4918 int i;
4919 bool first_mask_bit = false;
4920
4921 VSTART_CHECK_EARLY_EXIT(env, vl);
4922
4923 for (i = env->vstart; i < vl; i++) {
4924 if (!vm && !vext_elem_mask(v0, i)) {
4925 /* set masked-off elements to 1s */
4926 if (vma) {
4927 vext_set_elem_mask(vd, i, 1);
4928 }
4929 continue;
4930 }
4931 /* write a zero to all following active elements */
4932 if (first_mask_bit) {
4933 vext_set_elem_mask(vd, i, 0);
4934 continue;
4935 }
4936 if (vext_elem_mask(vs2, i)) {
4937 first_mask_bit = true;
4938 if (type == BEFORE_FIRST) {
4939 vext_set_elem_mask(vd, i, 0);
4940 } else {
4941 vext_set_elem_mask(vd, i, 1);
4942 }
4943 } else {
4944 if (type == ONLY_FIRST) {
4945 vext_set_elem_mask(vd, i, 0);
4946 } else {
4947 vext_set_elem_mask(vd, i, 1);
4948 }
4949 }
4950 }
4951 env->vstart = 0;
4952 /*
4953 * mask destination register are always tail-agnostic
4954 * set tail elements to 1s
4955 */
4956 if (vta_all_1s) {
4957 for (; i < total_elems; i++) {
4958 vext_set_elem_mask(vd, i, 1);
4959 }
4960 }
4961 }
4962
HELPER(vmsbf_m)4963 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4964 uint32_t desc)
4965 {
4966 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4967 }
4968
HELPER(vmsif_m)4969 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4970 uint32_t desc)
4971 {
4972 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4973 }
4974
HELPER(vmsof_m)4975 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4976 uint32_t desc)
4977 {
4978 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4979 }
4980
4981 /* Vector Iota Instruction */
4982 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
4983 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4984 uint32_t desc) \
4985 { \
4986 uint32_t vm = vext_vm(desc); \
4987 uint32_t vl = env->vl; \
4988 uint32_t esz = sizeof(ETYPE); \
4989 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4990 uint32_t vta = vext_vta(desc); \
4991 uint32_t vma = vext_vma(desc); \
4992 uint32_t sum = 0; \
4993 int i; \
4994 \
4995 VSTART_CHECK_EARLY_EXIT(env, vl); \
4996 \
4997 for (i = env->vstart; i < vl; i++) { \
4998 if (!vm && !vext_elem_mask(v0, i)) { \
4999 /* set masked-off elements to 1s */ \
5000 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5001 continue; \
5002 } \
5003 *((ETYPE *)vd + H(i)) = sum; \
5004 if (vext_elem_mask(vs2, i)) { \
5005 sum++; \
5006 } \
5007 } \
5008 env->vstart = 0; \
5009 /* set tail elements to 1s */ \
5010 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5011 }
5012
GEN_VEXT_VIOTA_M(viota_m_b,uint8_t,H1)5013 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
5014 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
5015 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
5016 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
5017
5018 /* Vector Element Index Instruction */
5019 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
5020 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
5021 { \
5022 uint32_t vm = vext_vm(desc); \
5023 uint32_t vl = env->vl; \
5024 uint32_t esz = sizeof(ETYPE); \
5025 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5026 uint32_t vta = vext_vta(desc); \
5027 uint32_t vma = vext_vma(desc); \
5028 int i; \
5029 \
5030 VSTART_CHECK_EARLY_EXIT(env, vl); \
5031 \
5032 for (i = env->vstart; i < vl; i++) { \
5033 if (!vm && !vext_elem_mask(v0, i)) { \
5034 /* set masked-off elements to 1s */ \
5035 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5036 continue; \
5037 } \
5038 *((ETYPE *)vd + H(i)) = i; \
5039 } \
5040 env->vstart = 0; \
5041 /* set tail elements to 1s */ \
5042 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5043 }
5044
5045 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
5046 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
5047 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
5048 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
5049
5050 /*
5051 * Vector Permutation Instructions
5052 */
5053
5054 /* Vector Slide Instructions */
5055 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
5056 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5057 CPURISCVState *env, uint32_t desc) \
5058 { \
5059 uint32_t vm = vext_vm(desc); \
5060 uint32_t vl = env->vl; \
5061 uint32_t esz = sizeof(ETYPE); \
5062 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5063 uint32_t vta = vext_vta(desc); \
5064 uint32_t vma = vext_vma(desc); \
5065 target_ulong offset = s1, i_min, i; \
5066 \
5067 VSTART_CHECK_EARLY_EXIT(env, vl); \
5068 \
5069 i_min = MAX(env->vstart, offset); \
5070 for (i = i_min; i < vl; i++) { \
5071 if (!vm && !vext_elem_mask(v0, i)) { \
5072 /* set masked-off elements to 1s */ \
5073 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5074 continue; \
5075 } \
5076 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
5077 } \
5078 env->vstart = 0; \
5079 /* set tail elements to 1s */ \
5080 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5081 }
5082
5083 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
5084 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
5085 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
5086 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
5087 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
5088
5089 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
5090 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5091 CPURISCVState *env, uint32_t desc) \
5092 { \
5093 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5094 uint32_t vm = vext_vm(desc); \
5095 uint32_t vl = env->vl; \
5096 uint32_t esz = sizeof(ETYPE); \
5097 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5098 uint32_t vta = vext_vta(desc); \
5099 uint32_t vma = vext_vma(desc); \
5100 target_ulong i_max, i_min, i; \
5101 \
5102 VSTART_CHECK_EARLY_EXIT(env, vl); \
5103 \
5104 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
5105 i_max = MAX(i_min, env->vstart); \
5106 for (i = env->vstart; i < i_max; ++i) { \
5107 if (!vm && !vext_elem_mask(v0, i)) { \
5108 /* set masked-off elements to 1s */ \
5109 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5110 continue; \
5111 } \
5112 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
5113 } \
5114 \
5115 for (i = i_max; i < vl; ++i) { \
5116 if (!vm && !vext_elem_mask(v0, i)) { \
5117 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5118 continue; \
5119 } \
5120 *((ETYPE *)vd + H(i)) = 0; \
5121 } \
5122 \
5123 env->vstart = 0; \
5124 /* set tail elements to 1s */ \
5125 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5126 }
5127
5128 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
5129 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5130 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5131 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5132 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
5133
5134 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
5135 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5136 void *vs2, CPURISCVState *env, \
5137 uint32_t desc) \
5138 { \
5139 typedef uint##BITWIDTH##_t ETYPE; \
5140 uint32_t vm = vext_vm(desc); \
5141 uint32_t vl = env->vl; \
5142 uint32_t esz = sizeof(ETYPE); \
5143 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5144 uint32_t vta = vext_vta(desc); \
5145 uint32_t vma = vext_vma(desc); \
5146 uint32_t i; \
5147 \
5148 VSTART_CHECK_EARLY_EXIT(env, vl); \
5149 \
5150 for (i = env->vstart; i < vl; i++) { \
5151 if (!vm && !vext_elem_mask(v0, i)) { \
5152 /* set masked-off elements to 1s */ \
5153 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5154 continue; \
5155 } \
5156 if (i == 0) { \
5157 *((ETYPE *)vd + H(i)) = s1; \
5158 } else { \
5159 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5160 } \
5161 } \
5162 env->vstart = 0; \
5163 /* set tail elements to 1s */ \
5164 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5165 }
5166
5167 GEN_VEXT_VSLIE1UP(8, H1)
5168 GEN_VEXT_VSLIE1UP(16, H2)
5169 GEN_VEXT_VSLIE1UP(32, H4)
5170 GEN_VEXT_VSLIE1UP(64, H8)
5171
5172 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
5173 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5174 CPURISCVState *env, uint32_t desc) \
5175 { \
5176 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5177 }
5178
5179 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
5180 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5181 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5182 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5183 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5184
5185 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
5186 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5187 void *vs2, CPURISCVState *env, \
5188 uint32_t desc) \
5189 { \
5190 typedef uint##BITWIDTH##_t ETYPE; \
5191 uint32_t vm = vext_vm(desc); \
5192 uint32_t vl = env->vl; \
5193 uint32_t esz = sizeof(ETYPE); \
5194 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5195 uint32_t vta = vext_vta(desc); \
5196 uint32_t vma = vext_vma(desc); \
5197 uint32_t i; \
5198 \
5199 VSTART_CHECK_EARLY_EXIT(env, vl); \
5200 \
5201 for (i = env->vstart; i < vl; i++) { \
5202 if (!vm && !vext_elem_mask(v0, i)) { \
5203 /* set masked-off elements to 1s */ \
5204 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5205 continue; \
5206 } \
5207 if (i == vl - 1) { \
5208 *((ETYPE *)vd + H(i)) = s1; \
5209 } else { \
5210 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5211 } \
5212 } \
5213 env->vstart = 0; \
5214 /* set tail elements to 1s */ \
5215 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5216 }
5217
5218 GEN_VEXT_VSLIDE1DOWN(8, H1)
5219 GEN_VEXT_VSLIDE1DOWN(16, H2)
5220 GEN_VEXT_VSLIDE1DOWN(32, H4)
5221 GEN_VEXT_VSLIDE1DOWN(64, H8)
5222
5223 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
5224 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5225 CPURISCVState *env, uint32_t desc) \
5226 { \
5227 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5228 }
5229
5230 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
5231 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5232 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5233 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5234 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5235
5236 /* Vector Floating-Point Slide Instructions */
5237 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
5238 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5239 CPURISCVState *env, uint32_t desc) \
5240 { \
5241 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5242 }
5243
5244 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5245 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5246 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5247 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5248
5249 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
5250 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5251 CPURISCVState *env, uint32_t desc) \
5252 { \
5253 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5254 }
5255
5256 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5257 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5258 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5259 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
5260
5261 /* Vector Register Gather Instruction */
5262 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
5263 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5264 CPURISCVState *env, uint32_t desc) \
5265 { \
5266 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
5267 uint32_t vm = vext_vm(desc); \
5268 uint32_t vl = env->vl; \
5269 uint32_t esz = sizeof(TS2); \
5270 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5271 uint32_t vta = vext_vta(desc); \
5272 uint32_t vma = vext_vma(desc); \
5273 uint64_t index; \
5274 uint32_t i; \
5275 \
5276 VSTART_CHECK_EARLY_EXIT(env, vl); \
5277 \
5278 for (i = env->vstart; i < vl; i++) { \
5279 if (!vm && !vext_elem_mask(v0, i)) { \
5280 /* set masked-off elements to 1s */ \
5281 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5282 continue; \
5283 } \
5284 index = *((TS1 *)vs1 + HS1(i)); \
5285 if (index >= vlmax) { \
5286 *((TS2 *)vd + HS2(i)) = 0; \
5287 } else { \
5288 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
5289 } \
5290 } \
5291 env->vstart = 0; \
5292 /* set tail elements to 1s */ \
5293 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5294 }
5295
5296 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
5297 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5298 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5299 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5300 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5301
5302 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5303 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5304 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5305 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5306
5307 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
5308 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5309 CPURISCVState *env, uint32_t desc) \
5310 { \
5311 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5312 uint32_t vm = vext_vm(desc); \
5313 uint32_t vl = env->vl; \
5314 uint32_t esz = sizeof(ETYPE); \
5315 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5316 uint32_t vta = vext_vta(desc); \
5317 uint32_t vma = vext_vma(desc); \
5318 uint64_t index = s1; \
5319 uint32_t i; \
5320 \
5321 VSTART_CHECK_EARLY_EXIT(env, vl); \
5322 \
5323 for (i = env->vstart; i < vl; i++) { \
5324 if (!vm && !vext_elem_mask(v0, i)) { \
5325 /* set masked-off elements to 1s */ \
5326 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5327 continue; \
5328 } \
5329 if (index >= vlmax) { \
5330 *((ETYPE *)vd + H(i)) = 0; \
5331 } else { \
5332 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5333 } \
5334 } \
5335 env->vstart = 0; \
5336 /* set tail elements to 1s */ \
5337 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5338 }
5339
5340 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
5341 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5342 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5343 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5344 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
5345
5346 /* Vector Compress Instruction */
5347 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
5348 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5349 CPURISCVState *env, uint32_t desc) \
5350 { \
5351 uint32_t vl = env->vl; \
5352 uint32_t esz = sizeof(ETYPE); \
5353 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5354 uint32_t vta = vext_vta(desc); \
5355 uint32_t num = 0, i; \
5356 \
5357 VSTART_CHECK_EARLY_EXIT(env, vl); \
5358 \
5359 for (i = env->vstart; i < vl; i++) { \
5360 if (!vext_elem_mask(vs1, i)) { \
5361 continue; \
5362 } \
5363 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5364 num++; \
5365 } \
5366 env->vstart = 0; \
5367 /* set tail elements to 1s */ \
5368 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \
5369 }
5370
5371 /* Compress into vd elements of vs2 where vs1 is enabled */
5372 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5373 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5374 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5375 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5376
5377 /* Vector Whole Register Move */
5378 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5379 {
5380 /* EEW = SEW */
5381 uint32_t maxsz = simd_maxsz(desc);
5382 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5383 uint32_t startb = env->vstart * sewb;
5384 uint32_t i = startb;
5385
5386 if (startb >= maxsz) {
5387 env->vstart = 0;
5388 return;
5389 }
5390
5391 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5392 uint32_t j = ROUND_UP(i, 8);
5393 memcpy((uint8_t *)vd + H1(j - 1),
5394 (uint8_t *)vs2 + H1(j - 1),
5395 j - i);
5396 i = j;
5397 }
5398
5399 memcpy((uint8_t *)vd + H1(i),
5400 (uint8_t *)vs2 + H1(i),
5401 maxsz - i);
5402
5403 env->vstart = 0;
5404 }
5405
5406 /* Vector Integer Extension */
5407 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5408 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5409 CPURISCVState *env, uint32_t desc) \
5410 { \
5411 uint32_t vl = env->vl; \
5412 uint32_t vm = vext_vm(desc); \
5413 uint32_t esz = sizeof(ETYPE); \
5414 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5415 uint32_t vta = vext_vta(desc); \
5416 uint32_t vma = vext_vma(desc); \
5417 uint32_t i; \
5418 \
5419 VSTART_CHECK_EARLY_EXIT(env, vl); \
5420 \
5421 for (i = env->vstart; i < vl; i++) { \
5422 if (!vm && !vext_elem_mask(v0, i)) { \
5423 /* set masked-off elements to 1s */ \
5424 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5425 continue; \
5426 } \
5427 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5428 } \
5429 env->vstart = 0; \
5430 /* set tail elements to 1s */ \
5431 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5432 }
5433
5434 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5435 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5436 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5437 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5438 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5439 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5440
5441 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5442 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5443 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5444 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5445 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5446 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)
5447