1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "accel/tcg/cpu-ldst.h"
25 #include "accel/tcg/probe.h"
26 #include "exec/page-protection.h"
27 #include "exec/helper-proto.h"
28 #include "exec/tlb-flags.h"
29 #include "exec/target_page.h"
30 #include "fpu/softfloat.h"
31 #include "tcg/tcg-gvec-desc.h"
32 #include "internals.h"
33 #include "vector_internals.h"
34 #include <math.h>
35
HELPER(vsetvl)36 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
37 target_ulong s2, target_ulong x0)
38 {
39 int vlmax, vl;
40 RISCVCPU *cpu = env_archcpu(env);
41 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
42 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
43 uint16_t sew = 8 << vsew;
44 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
45 int xlen = riscv_cpu_xlen(env);
46 bool vill = (s2 >> (xlen - 1)) & 0x1;
47 target_ulong reserved = s2 &
48 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
49 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
50 uint16_t vlen = cpu->cfg.vlenb << 3;
51 int8_t lmul;
52
53 if (vlmul & 4) {
54 /*
55 * Fractional LMUL, check:
56 *
57 * VLEN * LMUL >= SEW
58 * VLEN >> (8 - lmul) >= sew
59 * (vlenb << 3) >> (8 - lmul) >= sew
60 */
61 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
62 vill = true;
63 }
64 }
65
66 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
67 /* only set vill bit. */
68 env->vill = 1;
69 env->vtype = 0;
70 env->vl = 0;
71 env->vstart = 0;
72 return 0;
73 }
74
75 /* lmul encoded as in DisasContext::lmul */
76 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
77 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
78 if (s1 <= vlmax) {
79 vl = s1;
80 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) {
81 vl = (s1 + 1) >> 1;
82 } else {
83 vl = vlmax;
84 }
85
86 if (cpu->cfg.rvv_vsetvl_x0_vill && x0 && (env->vl != vl)) {
87 /* only set vill bit. */
88 env->vill = 1;
89 env->vtype = 0;
90 env->vl = 0;
91 env->vstart = 0;
92 return 0;
93 }
94
95 env->vl = vl;
96 env->vtype = s2;
97 env->vstart = 0;
98 env->vill = 0;
99 return vl;
100 }
101
102 /*
103 * Get the maximum number of elements can be operated.
104 *
105 * log2_esz: log2 of element size in bytes.
106 */
vext_max_elems(uint32_t desc,uint32_t log2_esz)107 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
108 {
109 /*
110 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
111 * so vlen in bytes (vlenb) is encoded as maxsz.
112 */
113 uint32_t vlenb = simd_maxsz(desc);
114
115 /* Return VLMAX */
116 int scale = vext_lmul(desc) - log2_esz;
117 return scale < 0 ? vlenb >> -scale : vlenb << scale;
118 }
119
120 /*
121 * This function checks watchpoint before real load operation.
122 *
123 * In system mode, the TLB API probe_access is enough for watchpoint check.
124 * In user mode, there is no watchpoint support now.
125 *
126 * It will trigger an exception if there is no mapping in TLB
127 * and page table walk can't fill the TLB entry. Then the guest
128 * software can return here after process the exception or never return.
129 *
130 * This function can also be used when direct access to probe_access_flags is
131 * needed in order to access the flags. If a pointer to a flags operand is
132 * provided the function will call probe_access_flags instead, use nonfault
133 * and update host and flags.
134 */
probe_pages(CPURISCVState * env,target_ulong addr,target_ulong len,uintptr_t ra,MMUAccessType access_type,int mmu_index,void ** host,int * flags,bool nonfault)135 static void probe_pages(CPURISCVState *env, target_ulong addr, target_ulong len,
136 uintptr_t ra, MMUAccessType access_type, int mmu_index,
137 void **host, int *flags, bool nonfault)
138 {
139 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
140 target_ulong curlen = MIN(pagelen, len);
141
142 if (flags != NULL) {
143 *flags = probe_access_flags(env, adjust_addr(env, addr), curlen,
144 access_type, mmu_index, nonfault, host, ra);
145 } else {
146 probe_access(env, adjust_addr(env, addr), curlen, access_type,
147 mmu_index, ra);
148 }
149
150 if (len > curlen) {
151 addr += curlen;
152 curlen = len - curlen;
153 if (flags != NULL) {
154 *flags = probe_access_flags(env, adjust_addr(env, addr), curlen,
155 access_type, mmu_index, nonfault,
156 host, ra);
157 } else {
158 probe_access(env, adjust_addr(env, addr), curlen, access_type,
159 mmu_index, ra);
160 }
161 }
162 }
163
164
vext_set_elem_mask(void * v0,int index,uint8_t value)165 static inline void vext_set_elem_mask(void *v0, int index,
166 uint8_t value)
167 {
168 int idx = index / 64;
169 int pos = index % 64;
170 uint64_t old = ((uint64_t *)v0)[idx];
171 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
172 }
173
174 /* elements operations for load and store */
175 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr,
176 uint32_t idx, void *vd, uintptr_t retaddr);
177 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host);
178
179 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
180 static inline QEMU_ALWAYS_INLINE \
181 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
182 uint32_t idx, void *vd, uintptr_t retaddr) \
183 { \
184 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
185 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
186 } \
187 \
188 static inline QEMU_ALWAYS_INLINE \
189 void NAME##_host(void *vd, uint32_t idx, void *host) \
190 { \
191 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
192 *cur = (ETYPE)LDSUF##_p(host); \
193 }
194
GEN_VEXT_LD_ELEM(lde_b,uint8_t,H1,ldub)195 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub)
196 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw)
197 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl)
198 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq)
199
200 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
201 static inline QEMU_ALWAYS_INLINE \
202 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
203 uint32_t idx, void *vd, uintptr_t retaddr) \
204 { \
205 ETYPE data = *((ETYPE *)vd + H(idx)); \
206 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
207 } \
208 \
209 static inline QEMU_ALWAYS_INLINE \
210 void NAME##_host(void *vd, uint32_t idx, void *host) \
211 { \
212 ETYPE data = *((ETYPE *)vd + H(idx)); \
213 STSUF##_p(host, data); \
214 }
215
216 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb)
217 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw)
218 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl)
219 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq)
220
221 static inline QEMU_ALWAYS_INLINE void
222 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb,
223 void *vd, uint32_t evl, target_ulong addr,
224 uint32_t reg_start, uintptr_t ra, uint32_t esz,
225 bool is_load)
226 {
227 uint32_t i;
228 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) {
229 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra);
230 }
231 }
232
233 static inline QEMU_ALWAYS_INLINE void
vext_continuous_ldst_host(CPURISCVState * env,vext_ldst_elem_fn_host * ldst_host,void * vd,uint32_t evl,uint32_t reg_start,void * host,uint32_t esz,bool is_load)234 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host,
235 void *vd, uint32_t evl, uint32_t reg_start, void *host,
236 uint32_t esz, bool is_load)
237 {
238 #if HOST_BIG_ENDIAN
239 for (; reg_start < evl; reg_start++, host += esz) {
240 ldst_host(vd, reg_start, host);
241 }
242 #else
243 if (esz == 1) {
244 uint32_t byte_offset = reg_start * esz;
245 uint32_t size = (evl - reg_start) * esz;
246
247 if (is_load) {
248 memcpy(vd + byte_offset, host, size);
249 } else {
250 memcpy(host, vd + byte_offset, size);
251 }
252 } else {
253 for (; reg_start < evl; reg_start++, host += esz) {
254 ldst_host(vd, reg_start, host);
255 }
256 }
257 #endif
258 }
259
vext_set_tail_elems_1s(target_ulong vl,void * vd,uint32_t desc,uint32_t nf,uint32_t esz,uint32_t max_elems)260 static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
261 uint32_t desc, uint32_t nf,
262 uint32_t esz, uint32_t max_elems)
263 {
264 uint32_t vta = vext_vta(desc);
265 int k;
266
267 if (vta == 0) {
268 return;
269 }
270
271 for (k = 0; k < nf; ++k) {
272 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
273 (k * max_elems + max_elems) * esz);
274 }
275 }
276
277 /*
278 * stride: access vector element from strided memory
279 */
280 static void
vext_ldst_stride(void * vd,void * v0,target_ulong base,target_ulong stride,CPURISCVState * env,uint32_t desc,uint32_t vm,vext_ldst_elem_fn_tlb * ldst_elem,uint32_t log2_esz,uintptr_t ra)281 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride,
282 CPURISCVState *env, uint32_t desc, uint32_t vm,
283 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz,
284 uintptr_t ra)
285 {
286 uint32_t i, k;
287 uint32_t nf = vext_nf(desc);
288 uint32_t max_elems = vext_max_elems(desc, log2_esz);
289 uint32_t esz = 1 << log2_esz;
290 uint32_t vma = vext_vma(desc);
291
292 VSTART_CHECK_EARLY_EXIT(env, env->vl);
293
294 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
295 k = 0;
296 while (k < nf) {
297 if (!vm && !vext_elem_mask(v0, i)) {
298 /* set masked-off elements to 1s */
299 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
300 (i + k * max_elems + 1) * esz);
301 k++;
302 continue;
303 }
304 target_ulong addr = base + stride * i + (k << log2_esz);
305 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
306 k++;
307 }
308 }
309 env->vstart = 0;
310
311 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
312 }
313
314 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
315 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
316 target_ulong stride, CPURISCVState *env, \
317 uint32_t desc) \
318 { \
319 uint32_t vm = vext_vm(desc); \
320 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
321 ctzl(sizeof(ETYPE)), GETPC()); \
322 }
323
GEN_VEXT_LD_STRIDE(vlse8_v,int8_t,lde_b_tlb)324 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb)
325 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb)
326 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb)
327 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb)
328
329 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
330 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
331 target_ulong stride, CPURISCVState *env, \
332 uint32_t desc) \
333 { \
334 uint32_t vm = vext_vm(desc); \
335 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
336 ctzl(sizeof(ETYPE)), GETPC()); \
337 }
338
339 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb)
340 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb)
341 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb)
342 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb)
343
344 /*
345 * unit-stride: access elements stored contiguously in memory
346 */
347
348 /* unmasked unit-stride load and store operation */
349 static inline QEMU_ALWAYS_INLINE void
350 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr,
351 uint32_t elems, uint32_t nf, uint32_t max_elems,
352 uint32_t log2_esz, bool is_load, int mmu_index,
353 vext_ldst_elem_fn_tlb *ldst_tlb,
354 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra)
355 {
356 void *host;
357 int i, k, flags;
358 uint32_t esz = 1 << log2_esz;
359 uint32_t size = (elems * nf) << log2_esz;
360 uint32_t evl = env->vstart + elems;
361 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
362
363 /* Check page permission/pmp/watchpoint/etc. */
364 probe_pages(env, addr, size, ra, access_type, mmu_index, &host, &flags,
365 true);
366
367 if (flags == 0) {
368 if (nf == 1) {
369 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart,
370 host, esz, is_load);
371 } else {
372 for (i = env->vstart; i < evl; ++i) {
373 k = 0;
374 while (k < nf) {
375 ldst_host(vd, i + k * max_elems, host);
376 host += esz;
377 k++;
378 }
379 }
380 }
381 env->vstart += elems;
382 } else {
383 if (nf == 1) {
384 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart,
385 ra, esz, is_load);
386 } else {
387 /* load bytes from guest memory */
388 for (i = env->vstart; i < evl; env->vstart = ++i) {
389 k = 0;
390 while (k < nf) {
391 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
392 vd, ra);
393 addr += esz;
394 k++;
395 }
396 }
397 }
398 }
399 }
400
401 static inline QEMU_ALWAYS_INLINE void
vext_ldst_us(void * vd,target_ulong base,CPURISCVState * env,uint32_t desc,vext_ldst_elem_fn_tlb * ldst_tlb,vext_ldst_elem_fn_host * ldst_host,uint32_t log2_esz,uint32_t evl,uintptr_t ra,bool is_load)402 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
403 vext_ldst_elem_fn_tlb *ldst_tlb,
404 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
405 uint32_t evl, uintptr_t ra, bool is_load)
406 {
407 uint32_t k;
408 target_ulong page_split, elems, addr;
409 uint32_t nf = vext_nf(desc);
410 uint32_t max_elems = vext_max_elems(desc, log2_esz);
411 uint32_t esz = 1 << log2_esz;
412 uint32_t msize = nf * esz;
413 int mmu_index = riscv_env_mmu_index(env, false);
414
415 VSTART_CHECK_EARLY_EXIT(env, evl);
416
417 #if defined(CONFIG_USER_ONLY)
418 /*
419 * For data sizes <= 6 bytes we get better performance by simply calling
420 * vext_continuous_ldst_tlb
421 */
422 if (nf == 1 && (evl << log2_esz) <= 6) {
423 addr = base + (env->vstart << log2_esz);
424 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra,
425 esz, is_load);
426
427 env->vstart = 0;
428 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
429 return;
430 }
431 #endif
432
433 /* Calculate the page range of first page */
434 addr = base + ((env->vstart * nf) << log2_esz);
435 page_split = -(addr | TARGET_PAGE_MASK);
436 /* Get number of elements */
437 elems = page_split / msize;
438 if (unlikely(env->vstart + elems >= evl)) {
439 elems = evl - env->vstart;
440 }
441
442 /* Load/store elements in the first page */
443 if (likely(elems)) {
444 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
445 is_load, mmu_index, ldst_tlb, ldst_host, ra);
446 }
447
448 /* Load/store elements in the second page */
449 if (unlikely(env->vstart < evl)) {
450 /* Cross page element */
451 if (unlikely(page_split % msize)) {
452 for (k = 0; k < nf; k++) {
453 addr = base + ((env->vstart * nf + k) << log2_esz);
454 ldst_tlb(env, adjust_addr(env, addr),
455 env->vstart + k * max_elems, vd, ra);
456 }
457 env->vstart++;
458 }
459
460 addr = base + ((env->vstart * nf) << log2_esz);
461 /* Get number of elements of second page */
462 elems = evl - env->vstart;
463
464 /* Load/store elements in the second page */
465 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
466 is_load, mmu_index, ldst_tlb, ldst_host, ra);
467 }
468
469 env->vstart = 0;
470 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
471 }
472
473 /*
474 * masked unit-stride load and store operation will be a special case of
475 * stride, stride = NF * sizeof (ETYPE)
476 */
477
478 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
479 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
480 CPURISCVState *env, uint32_t desc) \
481 { \
482 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
483 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
484 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
485 } \
486 \
487 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
488 CPURISCVState *env, uint32_t desc) \
489 { \
490 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
491 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \
492 }
493
GEN_VEXT_LD_US(vle8_v,int8_t,lde_b_tlb,lde_b_host)494 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host)
495 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host)
496 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host)
497 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host)
498
499 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
500 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
501 CPURISCVState *env, uint32_t desc) \
502 { \
503 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
504 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
505 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
506 } \
507 \
508 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
509 CPURISCVState *env, uint32_t desc) \
510 { \
511 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
512 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \
513 }
514
515 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host)
516 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host)
517 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host)
518 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host)
519
520 /*
521 * unit stride mask load and store, EEW = 1
522 */
523 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
524 CPURISCVState *env, uint32_t desc)
525 {
526 /* evl = ceil(vl/8) */
527 uint8_t evl = (env->vl + 7) >> 3;
528 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host,
529 0, evl, GETPC(), true);
530 }
531
HELPER(vsm_v)532 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
533 CPURISCVState *env, uint32_t desc)
534 {
535 /* evl = ceil(vl/8) */
536 uint8_t evl = (env->vl + 7) >> 3;
537 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host,
538 0, evl, GETPC(), false);
539 }
540
541 /*
542 * index: access vector element from indexed memory
543 */
544 typedef target_ulong vext_get_index_addr(target_ulong base,
545 uint32_t idx, void *vs2);
546
547 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
548 static target_ulong NAME(target_ulong base, \
549 uint32_t idx, void *vs2) \
550 { \
551 return (base + *((ETYPE *)vs2 + H(idx))); \
552 }
553
GEN_VEXT_GET_INDEX_ADDR(idx_b,uint8_t,H1)554 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
555 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
556 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
557 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
558
559 static inline void
560 vext_ldst_index(void *vd, void *v0, target_ulong base,
561 void *vs2, CPURISCVState *env, uint32_t desc,
562 vext_get_index_addr get_index_addr,
563 vext_ldst_elem_fn_tlb *ldst_elem,
564 uint32_t log2_esz, uintptr_t ra)
565 {
566 uint32_t i, k;
567 uint32_t nf = vext_nf(desc);
568 uint32_t vm = vext_vm(desc);
569 uint32_t max_elems = vext_max_elems(desc, log2_esz);
570 uint32_t esz = 1 << log2_esz;
571 uint32_t vma = vext_vma(desc);
572
573 VSTART_CHECK_EARLY_EXIT(env, env->vl);
574
575 /* load bytes from guest memory */
576 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
577 k = 0;
578 while (k < nf) {
579 if (!vm && !vext_elem_mask(v0, i)) {
580 /* set masked-off elements to 1s */
581 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
582 (i + k * max_elems + 1) * esz);
583 k++;
584 continue;
585 }
586 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
587 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
588 k++;
589 }
590 }
591 env->vstart = 0;
592
593 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
594 }
595
596 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
597 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
598 void *vs2, CPURISCVState *env, uint32_t desc) \
599 { \
600 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
601 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
602 }
603
GEN_VEXT_LD_INDEX(vlxei8_8_v,int8_t,idx_b,lde_b_tlb)604 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb)
605 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb)
606 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb)
607 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb)
608 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb)
609 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb)
610 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb)
611 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb)
612 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb)
613 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb)
614 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb)
615 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb)
616 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb)
617 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb)
618 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb)
619 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb)
620
621 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
622 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
623 void *vs2, CPURISCVState *env, uint32_t desc) \
624 { \
625 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
626 STORE_FN, ctzl(sizeof(ETYPE)), \
627 GETPC()); \
628 }
629
630 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb)
631 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb)
632 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb)
633 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb)
634 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb)
635 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb)
636 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb)
637 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb)
638 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb)
639 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb)
640 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb)
641 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb)
642 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb)
643 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb)
644 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb)
645 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb)
646
647 /*
648 * unit-stride fault-only-fisrt load instructions
649 */
650 static inline void
651 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env,
652 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb,
653 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra)
654 {
655 uint32_t i, k, vl = 0;
656 uint32_t nf = vext_nf(desc);
657 uint32_t vm = vext_vm(desc);
658 uint32_t max_elems = vext_max_elems(desc, log2_esz);
659 uint32_t esz = 1 << log2_esz;
660 uint32_t msize = nf * esz;
661 uint32_t vma = vext_vma(desc);
662 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems;
663 int mmu_index = riscv_env_mmu_index(env, false);
664 int flags, probe_flags;
665 void *host;
666
667 VSTART_CHECK_EARLY_EXIT(env, env->vl);
668
669 addr = base + ((env->vstart * nf) << log2_esz);
670 page_split = -(addr | TARGET_PAGE_MASK);
671 /* Get number of elements */
672 elems = page_split / msize;
673 if (unlikely(env->vstart + elems >= env->vl)) {
674 elems = env->vl - env->vstart;
675 }
676
677 /* Check page permission/pmp/watchpoint/etc. */
678 probe_pages(env, addr, elems * msize, ra, MMU_DATA_LOAD, mmu_index, &host,
679 &flags, true);
680
681 /* If we are crossing a page check also the second page. */
682 if (env->vl > elems) {
683 addr_probe = addr + (elems << log2_esz);
684 probe_pages(env, addr_probe, elems * msize, ra, MMU_DATA_LOAD,
685 mmu_index, &host, &probe_flags, true);
686 flags |= probe_flags;
687 }
688
689 if (flags & ~TLB_WATCHPOINT) {
690 /* probe every access */
691 for (i = env->vstart; i < env->vl; i++) {
692 if (!vm && !vext_elem_mask(v0, i)) {
693 continue;
694 }
695 addr_i = adjust_addr(env, base + i * (nf << log2_esz));
696 if (i == 0) {
697 /* Allow fault on first element. */
698 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD,
699 mmu_index, &host, NULL, false);
700 } else {
701 remain = nf << log2_esz;
702 while (remain > 0) {
703 offset = -(addr_i | TARGET_PAGE_MASK);
704
705 /* Probe nonfault on subsequent elements. */
706 probe_pages(env, addr_i, offset, 0, MMU_DATA_LOAD,
707 mmu_index, &host, &flags, true);
708
709 /*
710 * Stop if invalid (unmapped) or mmio (transaction may
711 * fail). Do not stop if watchpoint, as the spec says that
712 * first-fault should continue to access the same
713 * elements regardless of any watchpoint.
714 */
715 if (flags & ~TLB_WATCHPOINT) {
716 vl = i;
717 goto ProbeSuccess;
718 }
719 if (remain <= offset) {
720 break;
721 }
722 remain -= offset;
723 addr_i = adjust_addr(env, addr_i + offset);
724 }
725 }
726 }
727 }
728 ProbeSuccess:
729 /* load bytes from guest memory */
730 if (vl != 0) {
731 env->vl = vl;
732 }
733
734 if (env->vstart < env->vl) {
735 if (vm) {
736 /* Load/store elements in the first page */
737 if (likely(elems)) {
738 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
739 log2_esz, true, mmu_index, ldst_tlb,
740 ldst_host, ra);
741 }
742
743 /* Load/store elements in the second page */
744 if (unlikely(env->vstart < env->vl)) {
745 /* Cross page element */
746 if (unlikely(page_split % msize)) {
747 for (k = 0; k < nf; k++) {
748 addr = base + ((env->vstart * nf + k) << log2_esz);
749 ldst_tlb(env, adjust_addr(env, addr),
750 env->vstart + k * max_elems, vd, ra);
751 }
752 env->vstart++;
753 }
754
755 addr = base + ((env->vstart * nf) << log2_esz);
756 /* Get number of elements of second page */
757 elems = env->vl - env->vstart;
758
759 /* Load/store elements in the second page */
760 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
761 log2_esz, true, mmu_index, ldst_tlb,
762 ldst_host, ra);
763 }
764 } else {
765 for (i = env->vstart; i < env->vl; i++) {
766 k = 0;
767 while (k < nf) {
768 if (!vext_elem_mask(v0, i)) {
769 /* set masked-off elements to 1s */
770 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
771 (i + k * max_elems + 1) * esz);
772 k++;
773 continue;
774 }
775 addr = base + ((i * nf + k) << log2_esz);
776 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
777 vd, ra);
778 k++;
779 }
780 }
781 }
782 }
783 env->vstart = 0;
784
785 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
786 }
787
788 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
789 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
790 CPURISCVState *env, uint32_t desc) \
791 { \
792 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \
793 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \
794 }
795
GEN_VEXT_LDFF(vle8ff_v,int8_t,lde_b_tlb,lde_b_host)796 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host)
797 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host)
798 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host)
799 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host)
800
801 #define DO_SWAP(N, M) (M)
802 #define DO_AND(N, M) (N & M)
803 #define DO_XOR(N, M) (N ^ M)
804 #define DO_OR(N, M) (N | M)
805 #define DO_ADD(N, M) (N + M)
806
807 /* Signed min/max */
808 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
809 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
810
811 /*
812 * load and store whole register instructions
813 */
814 static inline QEMU_ALWAYS_INLINE void
815 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
816 vext_ldst_elem_fn_tlb *ldst_tlb,
817 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
818 uintptr_t ra, bool is_load)
819 {
820 target_ulong page_split, elems, addr;
821 uint32_t nf = vext_nf(desc);
822 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
823 uint32_t max_elems = vlenb >> log2_esz;
824 uint32_t evl = nf * max_elems;
825 uint32_t esz = 1 << log2_esz;
826 int mmu_index = riscv_env_mmu_index(env, false);
827
828 /* Calculate the page range of first page */
829 addr = base + (env->vstart << log2_esz);
830 page_split = -(addr | TARGET_PAGE_MASK);
831 /* Get number of elements */
832 elems = page_split / esz;
833 if (unlikely(env->vstart + elems >= evl)) {
834 elems = evl - env->vstart;
835 }
836
837 /* Load/store elements in the first page */
838 if (likely(elems)) {
839 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
840 is_load, mmu_index, ldst_tlb, ldst_host, ra);
841 }
842
843 /* Load/store elements in the second page */
844 if (unlikely(env->vstart < evl)) {
845 /* Cross page element */
846 if (unlikely(page_split % esz)) {
847 addr = base + (env->vstart << log2_esz);
848 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra);
849 env->vstart++;
850 }
851
852 addr = base + (env->vstart << log2_esz);
853 /* Get number of elements of second page */
854 elems = evl - env->vstart;
855
856 /* Load/store elements in the second page */
857 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
858 is_load, mmu_index, ldst_tlb, ldst_host, ra);
859 }
860
861 env->vstart = 0;
862 }
863
864 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
865 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
866 uint32_t desc) \
867 { \
868 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
869 ctzl(sizeof(ETYPE)), GETPC(), true); \
870 }
871
GEN_VEXT_LD_WHOLE(vl1re8_v,int8_t,lde_b_tlb,lde_b_host)872 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host)
873 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host)
874 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host)
875 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host)
876 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host)
877 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host)
878 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host)
879 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host)
880 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host)
881 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host)
882 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host)
883 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host)
884 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host)
885 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host)
886 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host)
887 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host)
888
889 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
890 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
891 uint32_t desc) \
892 { \
893 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
894 ctzl(sizeof(ETYPE)), GETPC(), false); \
895 }
896
897 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host)
898 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host)
899 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host)
900 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host)
901
902 /*
903 * Vector Integer Arithmetic Instructions
904 */
905
906 /* (TD, T1, T2, TX1, TX2) */
907 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
908 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
909 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
910 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
911 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
912 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
913 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
914 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
915 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
916 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
917 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
918 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
919 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
920 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
921 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
922 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
923 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
924 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
925 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
926 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
927 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
928 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
929 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
930
931 #define DO_SUB(N, M) (N - M)
932 #define DO_RSUB(N, M) (M - N)
933
934 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
935 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
936 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
937 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
938 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
939 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
940 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
941 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
942
943 GEN_VEXT_VV(vadd_vv_b, 1)
944 GEN_VEXT_VV(vadd_vv_h, 2)
945 GEN_VEXT_VV(vadd_vv_w, 4)
946 GEN_VEXT_VV(vadd_vv_d, 8)
947 GEN_VEXT_VV(vsub_vv_b, 1)
948 GEN_VEXT_VV(vsub_vv_h, 2)
949 GEN_VEXT_VV(vsub_vv_w, 4)
950 GEN_VEXT_VV(vsub_vv_d, 8)
951
952
953 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
954 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
955 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
956 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
957 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
958 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
959 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
960 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
961 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
962 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
963 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
964 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
965
966 GEN_VEXT_VX(vadd_vx_b, 1)
967 GEN_VEXT_VX(vadd_vx_h, 2)
968 GEN_VEXT_VX(vadd_vx_w, 4)
969 GEN_VEXT_VX(vadd_vx_d, 8)
970 GEN_VEXT_VX(vsub_vx_b, 1)
971 GEN_VEXT_VX(vsub_vx_h, 2)
972 GEN_VEXT_VX(vsub_vx_w, 4)
973 GEN_VEXT_VX(vsub_vx_d, 8)
974 GEN_VEXT_VX(vrsub_vx_b, 1)
975 GEN_VEXT_VX(vrsub_vx_h, 2)
976 GEN_VEXT_VX(vrsub_vx_w, 4)
977 GEN_VEXT_VX(vrsub_vx_d, 8)
978
979 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
980 {
981 intptr_t oprsz = simd_oprsz(desc);
982 intptr_t i;
983
984 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
985 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
986 }
987 }
988
HELPER(vec_rsubs16)989 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
990 {
991 intptr_t oprsz = simd_oprsz(desc);
992 intptr_t i;
993
994 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
995 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
996 }
997 }
998
HELPER(vec_rsubs32)999 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
1000 {
1001 intptr_t oprsz = simd_oprsz(desc);
1002 intptr_t i;
1003
1004 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1005 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
1006 }
1007 }
1008
HELPER(vec_rsubs64)1009 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
1010 {
1011 intptr_t oprsz = simd_oprsz(desc);
1012 intptr_t i;
1013
1014 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1015 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
1016 }
1017 }
1018
1019 /* Vector Widening Integer Add/Subtract */
1020 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
1021 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
1022 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
1023 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
1024 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
1025 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
1026 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
1027 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
1028 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
1029 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
1030 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
1031 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
RVVCALL(OPIVV2,vwaddu_vv_b,WOP_UUU_B,H2,H1,H1,DO_ADD)1032 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
1033 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
1034 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
1035 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
1036 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
1037 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
1038 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
1039 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
1040 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
1041 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
1042 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
1043 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
1044 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
1045 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
1046 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
1047 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
1048 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
1049 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
1050 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
1051 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
1052 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1053 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1054 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1055 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
1056 GEN_VEXT_VV(vwaddu_vv_b, 2)
1057 GEN_VEXT_VV(vwaddu_vv_h, 4)
1058 GEN_VEXT_VV(vwaddu_vv_w, 8)
1059 GEN_VEXT_VV(vwsubu_vv_b, 2)
1060 GEN_VEXT_VV(vwsubu_vv_h, 4)
1061 GEN_VEXT_VV(vwsubu_vv_w, 8)
1062 GEN_VEXT_VV(vwadd_vv_b, 2)
1063 GEN_VEXT_VV(vwadd_vv_h, 4)
1064 GEN_VEXT_VV(vwadd_vv_w, 8)
1065 GEN_VEXT_VV(vwsub_vv_b, 2)
1066 GEN_VEXT_VV(vwsub_vv_h, 4)
1067 GEN_VEXT_VV(vwsub_vv_w, 8)
1068 GEN_VEXT_VV(vwaddu_wv_b, 2)
1069 GEN_VEXT_VV(vwaddu_wv_h, 4)
1070 GEN_VEXT_VV(vwaddu_wv_w, 8)
1071 GEN_VEXT_VV(vwsubu_wv_b, 2)
1072 GEN_VEXT_VV(vwsubu_wv_h, 4)
1073 GEN_VEXT_VV(vwsubu_wv_w, 8)
1074 GEN_VEXT_VV(vwadd_wv_b, 2)
1075 GEN_VEXT_VV(vwadd_wv_h, 4)
1076 GEN_VEXT_VV(vwadd_wv_w, 8)
1077 GEN_VEXT_VV(vwsub_wv_b, 2)
1078 GEN_VEXT_VV(vwsub_wv_h, 4)
1079 GEN_VEXT_VV(vwsub_wv_w, 8)
1080
1081 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1082 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1083 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1084 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1085 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1086 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1087 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1088 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1089 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1090 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1091 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1092 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1093 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1094 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1095 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1096 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1097 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1098 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1099 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1100 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1101 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1102 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1103 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1104 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1105 GEN_VEXT_VX(vwaddu_vx_b, 2)
1106 GEN_VEXT_VX(vwaddu_vx_h, 4)
1107 GEN_VEXT_VX(vwaddu_vx_w, 8)
1108 GEN_VEXT_VX(vwsubu_vx_b, 2)
1109 GEN_VEXT_VX(vwsubu_vx_h, 4)
1110 GEN_VEXT_VX(vwsubu_vx_w, 8)
1111 GEN_VEXT_VX(vwadd_vx_b, 2)
1112 GEN_VEXT_VX(vwadd_vx_h, 4)
1113 GEN_VEXT_VX(vwadd_vx_w, 8)
1114 GEN_VEXT_VX(vwsub_vx_b, 2)
1115 GEN_VEXT_VX(vwsub_vx_h, 4)
1116 GEN_VEXT_VX(vwsub_vx_w, 8)
1117 GEN_VEXT_VX(vwaddu_wx_b, 2)
1118 GEN_VEXT_VX(vwaddu_wx_h, 4)
1119 GEN_VEXT_VX(vwaddu_wx_w, 8)
1120 GEN_VEXT_VX(vwsubu_wx_b, 2)
1121 GEN_VEXT_VX(vwsubu_wx_h, 4)
1122 GEN_VEXT_VX(vwsubu_wx_w, 8)
1123 GEN_VEXT_VX(vwadd_wx_b, 2)
1124 GEN_VEXT_VX(vwadd_wx_h, 4)
1125 GEN_VEXT_VX(vwadd_wx_w, 8)
1126 GEN_VEXT_VX(vwsub_wx_b, 2)
1127 GEN_VEXT_VX(vwsub_wx_h, 4)
1128 GEN_VEXT_VX(vwsub_wx_w, 8)
1129
1130 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1131 #define DO_VADC(N, M, C) (N + M + C)
1132 #define DO_VSBC(N, M, C) (N - M - C)
1133
1134 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
1135 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1136 CPURISCVState *env, uint32_t desc) \
1137 { \
1138 uint32_t vl = env->vl; \
1139 uint32_t esz = sizeof(ETYPE); \
1140 uint32_t total_elems = \
1141 vext_get_total_elems(env, desc, esz); \
1142 uint32_t vta = vext_vta(desc); \
1143 uint32_t i; \
1144 \
1145 VSTART_CHECK_EARLY_EXIT(env, vl); \
1146 \
1147 for (i = env->vstart; i < vl; i++) { \
1148 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1149 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1150 ETYPE carry = vext_elem_mask(v0, i); \
1151 \
1152 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1153 } \
1154 env->vstart = 0; \
1155 /* set tail elements to 1s */ \
1156 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1157 }
1158
1159 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1160 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1161 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1162 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
1163
1164 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1165 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1166 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1167 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
1168
1169 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
1170 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1171 CPURISCVState *env, uint32_t desc) \
1172 { \
1173 uint32_t vl = env->vl; \
1174 uint32_t esz = sizeof(ETYPE); \
1175 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1176 uint32_t vta = vext_vta(desc); \
1177 uint32_t i; \
1178 \
1179 VSTART_CHECK_EARLY_EXIT(env, vl); \
1180 \
1181 for (i = env->vstart; i < vl; i++) { \
1182 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1183 ETYPE carry = vext_elem_mask(v0, i); \
1184 \
1185 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1186 } \
1187 env->vstart = 0; \
1188 /* set tail elements to 1s */ \
1189 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1190 }
1191
1192 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1193 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1194 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1195 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1196
1197 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1198 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1199 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1200 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1201
1202 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1203 (__typeof(N))(N + M) < N)
1204 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1205
1206 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1207 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1208 CPURISCVState *env, uint32_t desc) \
1209 { \
1210 uint32_t vl = env->vl; \
1211 uint32_t vm = vext_vm(desc); \
1212 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1213 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1214 uint32_t i; \
1215 \
1216 VSTART_CHECK_EARLY_EXIT(env, vl); \
1217 \
1218 for (i = env->vstart; i < vl; i++) { \
1219 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1220 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1221 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1222 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1223 } \
1224 env->vstart = 0; \
1225 /*
1226 * mask destination register are always tail-agnostic
1227 * set tail elements to 1s
1228 */ \
1229 if (vta_all_1s) { \
1230 for (; i < total_elems; i++) { \
1231 vext_set_elem_mask(vd, i, 1); \
1232 } \
1233 } \
1234 }
1235
1236 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1237 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1238 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1239 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1240
1241 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1242 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1243 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1244 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1245
1246 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1247 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1248 void *vs2, CPURISCVState *env, uint32_t desc) \
1249 { \
1250 uint32_t vl = env->vl; \
1251 uint32_t vm = vext_vm(desc); \
1252 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1253 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1254 uint32_t i; \
1255 \
1256 VSTART_CHECK_EARLY_EXIT(env, vl); \
1257 \
1258 for (i = env->vstart; i < vl; i++) { \
1259 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1260 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1261 vext_set_elem_mask(vd, i, \
1262 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1263 } \
1264 env->vstart = 0; \
1265 /*
1266 * mask destination register are always tail-agnostic
1267 * set tail elements to 1s
1268 */ \
1269 if (vta_all_1s) { \
1270 for (; i < total_elems; i++) { \
1271 vext_set_elem_mask(vd, i, 1); \
1272 } \
1273 } \
1274 }
1275
1276 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1277 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1278 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1279 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1280
1281 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1282 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1283 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1284 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1285
1286 /* Vector Bitwise Logical Instructions */
1287 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1288 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1289 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1290 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1291 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1292 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1293 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1294 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1295 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1296 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1297 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1298 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1299 GEN_VEXT_VV(vand_vv_b, 1)
1300 GEN_VEXT_VV(vand_vv_h, 2)
1301 GEN_VEXT_VV(vand_vv_w, 4)
1302 GEN_VEXT_VV(vand_vv_d, 8)
1303 GEN_VEXT_VV(vor_vv_b, 1)
1304 GEN_VEXT_VV(vor_vv_h, 2)
1305 GEN_VEXT_VV(vor_vv_w, 4)
1306 GEN_VEXT_VV(vor_vv_d, 8)
1307 GEN_VEXT_VV(vxor_vv_b, 1)
1308 GEN_VEXT_VV(vxor_vv_h, 2)
1309 GEN_VEXT_VV(vxor_vv_w, 4)
1310 GEN_VEXT_VV(vxor_vv_d, 8)
1311
1312 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1313 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1314 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1315 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1316 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1317 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1318 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1319 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1320 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1321 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1322 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1323 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1324 GEN_VEXT_VX(vand_vx_b, 1)
1325 GEN_VEXT_VX(vand_vx_h, 2)
1326 GEN_VEXT_VX(vand_vx_w, 4)
1327 GEN_VEXT_VX(vand_vx_d, 8)
1328 GEN_VEXT_VX(vor_vx_b, 1)
1329 GEN_VEXT_VX(vor_vx_h, 2)
1330 GEN_VEXT_VX(vor_vx_w, 4)
1331 GEN_VEXT_VX(vor_vx_d, 8)
1332 GEN_VEXT_VX(vxor_vx_b, 1)
1333 GEN_VEXT_VX(vxor_vx_h, 2)
1334 GEN_VEXT_VX(vxor_vx_w, 4)
1335 GEN_VEXT_VX(vxor_vx_d, 8)
1336
1337 /* Vector Single-Width Bit Shift Instructions */
1338 #define DO_SLL(N, M) (N << (M))
1339 #define DO_SRL(N, M) (N >> (M))
1340
1341 /* generate the helpers for shift instructions with two vector operators */
1342 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1343 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1344 void *vs2, CPURISCVState *env, uint32_t desc) \
1345 { \
1346 uint32_t vm = vext_vm(desc); \
1347 uint32_t vl = env->vl; \
1348 uint32_t esz = sizeof(TS1); \
1349 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1350 uint32_t vta = vext_vta(desc); \
1351 uint32_t vma = vext_vma(desc); \
1352 uint32_t i; \
1353 \
1354 VSTART_CHECK_EARLY_EXIT(env, vl); \
1355 \
1356 for (i = env->vstart; i < vl; i++) { \
1357 if (!vm && !vext_elem_mask(v0, i)) { \
1358 /* set masked-off elements to 1s */ \
1359 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
1360 continue; \
1361 } \
1362 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1363 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1364 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1365 } \
1366 env->vstart = 0; \
1367 /* set tail elements to 1s */ \
1368 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1369 }
1370
1371 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1372 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1373 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1374 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1375
1376 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1377 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1378 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1379 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1380
1381 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1382 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1383 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1384 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1385
1386 /*
1387 * generate the helpers for shift instructions with one vector and one scalar
1388 */
1389 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1390 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1391 void *vs2, CPURISCVState *env, \
1392 uint32_t desc) \
1393 { \
1394 uint32_t vm = vext_vm(desc); \
1395 uint32_t vl = env->vl; \
1396 uint32_t esz = sizeof(TD); \
1397 uint32_t total_elems = \
1398 vext_get_total_elems(env, desc, esz); \
1399 uint32_t vta = vext_vta(desc); \
1400 uint32_t vma = vext_vma(desc); \
1401 uint32_t i; \
1402 \
1403 VSTART_CHECK_EARLY_EXIT(env, vl); \
1404 \
1405 for (i = env->vstart; i < vl; i++) { \
1406 if (!vm && !vext_elem_mask(v0, i)) { \
1407 /* set masked-off elements to 1s */ \
1408 vext_set_elems_1s(vd, vma, i * esz, \
1409 (i + 1) * esz); \
1410 continue; \
1411 } \
1412 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1413 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1414 } \
1415 env->vstart = 0; \
1416 /* set tail elements to 1s */ \
1417 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
1418 }
1419
1420 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1421 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1422 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1423 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1424
1425 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1426 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1427 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1428 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1429
1430 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1431 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1432 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1433 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1434
1435 /* Vector Narrowing Integer Right Shift Instructions */
1436 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1437 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1438 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1439 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1440 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1441 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1442 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1443 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1444 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1445 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1446 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1447 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1448
1449 /* Vector Integer Comparison Instructions */
1450 #define DO_MSEQ(N, M) (N == M)
1451 #define DO_MSNE(N, M) (N != M)
1452 #define DO_MSLT(N, M) (N < M)
1453 #define DO_MSLE(N, M) (N <= M)
1454 #define DO_MSGT(N, M) (N > M)
1455
1456 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1457 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1458 CPURISCVState *env, uint32_t desc) \
1459 { \
1460 uint32_t vm = vext_vm(desc); \
1461 uint32_t vl = env->vl; \
1462 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1463 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1464 uint32_t vma = vext_vma(desc); \
1465 uint32_t i; \
1466 \
1467 VSTART_CHECK_EARLY_EXIT(env, vl); \
1468 \
1469 for (i = env->vstart; i < vl; i++) { \
1470 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1471 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1472 if (!vm && !vext_elem_mask(v0, i)) { \
1473 /* set masked-off elements to 1s */ \
1474 if (vma) { \
1475 vext_set_elem_mask(vd, i, 1); \
1476 } \
1477 continue; \
1478 } \
1479 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1480 } \
1481 env->vstart = 0; \
1482 /*
1483 * mask destination register are always tail-agnostic
1484 * set tail elements to 1s
1485 */ \
1486 if (vta_all_1s) { \
1487 for (; i < total_elems; i++) { \
1488 vext_set_elem_mask(vd, i, 1); \
1489 } \
1490 } \
1491 }
1492
1493 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1494 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1495 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1496 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1497
1498 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1499 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1500 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1501 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1502
1503 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1504 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1505 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1506 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1507
1508 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1509 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1510 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1511 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1512
1513 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1514 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1515 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1516 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1517
1518 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1519 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1520 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1521 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1522
1523 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1524 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1525 CPURISCVState *env, uint32_t desc) \
1526 { \
1527 uint32_t vm = vext_vm(desc); \
1528 uint32_t vl = env->vl; \
1529 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1530 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1531 uint32_t vma = vext_vma(desc); \
1532 uint32_t i; \
1533 \
1534 VSTART_CHECK_EARLY_EXIT(env, vl); \
1535 \
1536 for (i = env->vstart; i < vl; i++) { \
1537 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1538 if (!vm && !vext_elem_mask(v0, i)) { \
1539 /* set masked-off elements to 1s */ \
1540 if (vma) { \
1541 vext_set_elem_mask(vd, i, 1); \
1542 } \
1543 continue; \
1544 } \
1545 vext_set_elem_mask(vd, i, \
1546 DO_OP(s2, (ETYPE)(target_long)s1)); \
1547 } \
1548 env->vstart = 0; \
1549 /*
1550 * mask destination register are always tail-agnostic
1551 * set tail elements to 1s
1552 */ \
1553 if (vta_all_1s) { \
1554 for (; i < total_elems; i++) { \
1555 vext_set_elem_mask(vd, i, 1); \
1556 } \
1557 } \
1558 }
1559
1560 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1561 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1562 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1563 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1564
1565 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1566 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1567 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1568 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1569
1570 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1571 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1572 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1573 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1574
1575 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1576 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1577 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1578 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1579
1580 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1581 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1582 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1583 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1584
1585 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1586 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1587 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1588 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1589
1590 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1591 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1592 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1593 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1594
1595 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1596 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1597 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1598 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1599
1600 /* Vector Integer Min/Max Instructions */
1601 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1602 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1603 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1604 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1605 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1606 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1607 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1608 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1609 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1610 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1611 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1612 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1613 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1614 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1615 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1616 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1617 GEN_VEXT_VV(vminu_vv_b, 1)
1618 GEN_VEXT_VV(vminu_vv_h, 2)
1619 GEN_VEXT_VV(vminu_vv_w, 4)
1620 GEN_VEXT_VV(vminu_vv_d, 8)
1621 GEN_VEXT_VV(vmin_vv_b, 1)
1622 GEN_VEXT_VV(vmin_vv_h, 2)
1623 GEN_VEXT_VV(vmin_vv_w, 4)
1624 GEN_VEXT_VV(vmin_vv_d, 8)
1625 GEN_VEXT_VV(vmaxu_vv_b, 1)
1626 GEN_VEXT_VV(vmaxu_vv_h, 2)
1627 GEN_VEXT_VV(vmaxu_vv_w, 4)
1628 GEN_VEXT_VV(vmaxu_vv_d, 8)
1629 GEN_VEXT_VV(vmax_vv_b, 1)
1630 GEN_VEXT_VV(vmax_vv_h, 2)
1631 GEN_VEXT_VV(vmax_vv_w, 4)
1632 GEN_VEXT_VV(vmax_vv_d, 8)
1633
1634 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1635 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1636 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1637 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1638 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1639 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1640 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1641 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1642 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1643 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1644 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1645 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1646 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1647 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1648 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1649 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1650 GEN_VEXT_VX(vminu_vx_b, 1)
1651 GEN_VEXT_VX(vminu_vx_h, 2)
1652 GEN_VEXT_VX(vminu_vx_w, 4)
1653 GEN_VEXT_VX(vminu_vx_d, 8)
1654 GEN_VEXT_VX(vmin_vx_b, 1)
1655 GEN_VEXT_VX(vmin_vx_h, 2)
1656 GEN_VEXT_VX(vmin_vx_w, 4)
1657 GEN_VEXT_VX(vmin_vx_d, 8)
1658 GEN_VEXT_VX(vmaxu_vx_b, 1)
1659 GEN_VEXT_VX(vmaxu_vx_h, 2)
1660 GEN_VEXT_VX(vmaxu_vx_w, 4)
1661 GEN_VEXT_VX(vmaxu_vx_d, 8)
1662 GEN_VEXT_VX(vmax_vx_b, 1)
1663 GEN_VEXT_VX(vmax_vx_h, 2)
1664 GEN_VEXT_VX(vmax_vx_w, 4)
1665 GEN_VEXT_VX(vmax_vx_d, 8)
1666
1667 /* Vector Single-Width Integer Multiply Instructions */
1668 #define DO_MUL(N, M) (N * M)
1669 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1670 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1671 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1672 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1673 GEN_VEXT_VV(vmul_vv_b, 1)
1674 GEN_VEXT_VV(vmul_vv_h, 2)
1675 GEN_VEXT_VV(vmul_vv_w, 4)
1676 GEN_VEXT_VV(vmul_vv_d, 8)
1677
1678 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1679 {
1680 return (int16_t)s2 * (int16_t)s1 >> 8;
1681 }
1682
do_mulh_h(int16_t s2,int16_t s1)1683 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1684 {
1685 return (int32_t)s2 * (int32_t)s1 >> 16;
1686 }
1687
do_mulh_w(int32_t s2,int32_t s1)1688 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1689 {
1690 return (int64_t)s2 * (int64_t)s1 >> 32;
1691 }
1692
do_mulh_d(int64_t s2,int64_t s1)1693 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1694 {
1695 uint64_t hi_64, lo_64;
1696
1697 muls64(&lo_64, &hi_64, s1, s2);
1698 return hi_64;
1699 }
1700
do_mulhu_b(uint8_t s2,uint8_t s1)1701 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1702 {
1703 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1704 }
1705
do_mulhu_h(uint16_t s2,uint16_t s1)1706 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1707 {
1708 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1709 }
1710
do_mulhu_w(uint32_t s2,uint32_t s1)1711 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1712 {
1713 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1714 }
1715
do_mulhu_d(uint64_t s2,uint64_t s1)1716 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1717 {
1718 uint64_t hi_64, lo_64;
1719
1720 mulu64(&lo_64, &hi_64, s2, s1);
1721 return hi_64;
1722 }
1723
do_mulhsu_b(int8_t s2,uint8_t s1)1724 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1725 {
1726 return (int16_t)s2 * (uint16_t)s1 >> 8;
1727 }
1728
do_mulhsu_h(int16_t s2,uint16_t s1)1729 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1730 {
1731 return (int32_t)s2 * (uint32_t)s1 >> 16;
1732 }
1733
do_mulhsu_w(int32_t s2,uint32_t s1)1734 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1735 {
1736 return (int64_t)s2 * (uint64_t)s1 >> 32;
1737 }
1738
1739 /*
1740 * Let A = signed operand,
1741 * B = unsigned operand
1742 * P = mulu64(A, B), unsigned product
1743 *
1744 * LET X = 2 ** 64 - A, 2's complement of A
1745 * SP = signed product
1746 * THEN
1747 * IF A < 0
1748 * SP = -X * B
1749 * = -(2 ** 64 - A) * B
1750 * = A * B - 2 ** 64 * B
1751 * = P - 2 ** 64 * B
1752 * ELSE
1753 * SP = P
1754 * THEN
1755 * HI_P -= (A < 0 ? B : 0)
1756 */
1757
do_mulhsu_d(int64_t s2,uint64_t s1)1758 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1759 {
1760 uint64_t hi_64, lo_64;
1761
1762 mulu64(&lo_64, &hi_64, s2, s1);
1763
1764 hi_64 -= s2 < 0 ? s1 : 0;
1765 return hi_64;
1766 }
1767
1768 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1769 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1770 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1771 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1772 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1773 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1774 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1775 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1776 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1777 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1778 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1779 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1780 GEN_VEXT_VV(vmulh_vv_b, 1)
1781 GEN_VEXT_VV(vmulh_vv_h, 2)
1782 GEN_VEXT_VV(vmulh_vv_w, 4)
1783 GEN_VEXT_VV(vmulh_vv_d, 8)
1784 GEN_VEXT_VV(vmulhu_vv_b, 1)
1785 GEN_VEXT_VV(vmulhu_vv_h, 2)
1786 GEN_VEXT_VV(vmulhu_vv_w, 4)
1787 GEN_VEXT_VV(vmulhu_vv_d, 8)
1788 GEN_VEXT_VV(vmulhsu_vv_b, 1)
1789 GEN_VEXT_VV(vmulhsu_vv_h, 2)
1790 GEN_VEXT_VV(vmulhsu_vv_w, 4)
1791 GEN_VEXT_VV(vmulhsu_vv_d, 8)
1792
1793 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1794 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1795 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1796 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1797 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1798 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1799 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1800 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1801 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1802 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1803 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1804 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1805 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1806 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1807 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1808 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1809 GEN_VEXT_VX(vmul_vx_b, 1)
1810 GEN_VEXT_VX(vmul_vx_h, 2)
1811 GEN_VEXT_VX(vmul_vx_w, 4)
1812 GEN_VEXT_VX(vmul_vx_d, 8)
1813 GEN_VEXT_VX(vmulh_vx_b, 1)
1814 GEN_VEXT_VX(vmulh_vx_h, 2)
1815 GEN_VEXT_VX(vmulh_vx_w, 4)
1816 GEN_VEXT_VX(vmulh_vx_d, 8)
1817 GEN_VEXT_VX(vmulhu_vx_b, 1)
1818 GEN_VEXT_VX(vmulhu_vx_h, 2)
1819 GEN_VEXT_VX(vmulhu_vx_w, 4)
1820 GEN_VEXT_VX(vmulhu_vx_d, 8)
1821 GEN_VEXT_VX(vmulhsu_vx_b, 1)
1822 GEN_VEXT_VX(vmulhsu_vx_h, 2)
1823 GEN_VEXT_VX(vmulhsu_vx_w, 4)
1824 GEN_VEXT_VX(vmulhsu_vx_d, 8)
1825
1826 /* Vector Integer Divide Instructions */
1827 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1828 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1829 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
1830 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1831 #define DO_REM(N, M) (unlikely(M == 0) ? N : \
1832 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1833
1834 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1835 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1836 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1837 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1838 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1839 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1840 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1841 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1842 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1843 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1844 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1845 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1846 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1847 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1848 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1849 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1850 GEN_VEXT_VV(vdivu_vv_b, 1)
1851 GEN_VEXT_VV(vdivu_vv_h, 2)
1852 GEN_VEXT_VV(vdivu_vv_w, 4)
1853 GEN_VEXT_VV(vdivu_vv_d, 8)
1854 GEN_VEXT_VV(vdiv_vv_b, 1)
1855 GEN_VEXT_VV(vdiv_vv_h, 2)
1856 GEN_VEXT_VV(vdiv_vv_w, 4)
1857 GEN_VEXT_VV(vdiv_vv_d, 8)
1858 GEN_VEXT_VV(vremu_vv_b, 1)
1859 GEN_VEXT_VV(vremu_vv_h, 2)
1860 GEN_VEXT_VV(vremu_vv_w, 4)
1861 GEN_VEXT_VV(vremu_vv_d, 8)
1862 GEN_VEXT_VV(vrem_vv_b, 1)
1863 GEN_VEXT_VV(vrem_vv_h, 2)
1864 GEN_VEXT_VV(vrem_vv_w, 4)
1865 GEN_VEXT_VV(vrem_vv_d, 8)
1866
1867 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1868 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1869 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1870 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1871 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1872 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1873 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1874 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1875 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1876 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1877 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1878 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1879 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1880 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1881 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1882 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1883 GEN_VEXT_VX(vdivu_vx_b, 1)
1884 GEN_VEXT_VX(vdivu_vx_h, 2)
1885 GEN_VEXT_VX(vdivu_vx_w, 4)
1886 GEN_VEXT_VX(vdivu_vx_d, 8)
1887 GEN_VEXT_VX(vdiv_vx_b, 1)
1888 GEN_VEXT_VX(vdiv_vx_h, 2)
1889 GEN_VEXT_VX(vdiv_vx_w, 4)
1890 GEN_VEXT_VX(vdiv_vx_d, 8)
1891 GEN_VEXT_VX(vremu_vx_b, 1)
1892 GEN_VEXT_VX(vremu_vx_h, 2)
1893 GEN_VEXT_VX(vremu_vx_w, 4)
1894 GEN_VEXT_VX(vremu_vx_d, 8)
1895 GEN_VEXT_VX(vrem_vx_b, 1)
1896 GEN_VEXT_VX(vrem_vx_h, 2)
1897 GEN_VEXT_VX(vrem_vx_w, 4)
1898 GEN_VEXT_VX(vrem_vx_d, 8)
1899
1900 /* Vector Widening Integer Multiply Instructions */
1901 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1902 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1903 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1904 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1905 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1906 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1907 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1908 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1909 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1910 GEN_VEXT_VV(vwmul_vv_b, 2)
1911 GEN_VEXT_VV(vwmul_vv_h, 4)
1912 GEN_VEXT_VV(vwmul_vv_w, 8)
1913 GEN_VEXT_VV(vwmulu_vv_b, 2)
1914 GEN_VEXT_VV(vwmulu_vv_h, 4)
1915 GEN_VEXT_VV(vwmulu_vv_w, 8)
1916 GEN_VEXT_VV(vwmulsu_vv_b, 2)
1917 GEN_VEXT_VV(vwmulsu_vv_h, 4)
1918 GEN_VEXT_VV(vwmulsu_vv_w, 8)
1919
1920 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1921 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1922 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1923 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1924 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1925 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1926 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1927 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1928 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1929 GEN_VEXT_VX(vwmul_vx_b, 2)
1930 GEN_VEXT_VX(vwmul_vx_h, 4)
1931 GEN_VEXT_VX(vwmul_vx_w, 8)
1932 GEN_VEXT_VX(vwmulu_vx_b, 2)
1933 GEN_VEXT_VX(vwmulu_vx_h, 4)
1934 GEN_VEXT_VX(vwmulu_vx_w, 8)
1935 GEN_VEXT_VX(vwmulsu_vx_b, 2)
1936 GEN_VEXT_VX(vwmulsu_vx_h, 4)
1937 GEN_VEXT_VX(vwmulsu_vx_w, 8)
1938
1939 /* Vector Single-Width Integer Multiply-Add Instructions */
1940 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1941 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1942 { \
1943 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1944 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1945 TD d = *((TD *)vd + HD(i)); \
1946 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1947 }
1948
1949 #define DO_MACC(N, M, D) (M * N + D)
1950 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1951 #define DO_MADD(N, M, D) (M * D + N)
1952 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1953 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1954 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1955 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1956 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1957 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1958 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1959 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1960 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1961 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1962 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1963 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1964 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1965 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1966 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1967 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1968 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1969 GEN_VEXT_VV(vmacc_vv_b, 1)
1970 GEN_VEXT_VV(vmacc_vv_h, 2)
1971 GEN_VEXT_VV(vmacc_vv_w, 4)
1972 GEN_VEXT_VV(vmacc_vv_d, 8)
1973 GEN_VEXT_VV(vnmsac_vv_b, 1)
1974 GEN_VEXT_VV(vnmsac_vv_h, 2)
1975 GEN_VEXT_VV(vnmsac_vv_w, 4)
1976 GEN_VEXT_VV(vnmsac_vv_d, 8)
1977 GEN_VEXT_VV(vmadd_vv_b, 1)
1978 GEN_VEXT_VV(vmadd_vv_h, 2)
1979 GEN_VEXT_VV(vmadd_vv_w, 4)
1980 GEN_VEXT_VV(vmadd_vv_d, 8)
1981 GEN_VEXT_VV(vnmsub_vv_b, 1)
1982 GEN_VEXT_VV(vnmsub_vv_h, 2)
1983 GEN_VEXT_VV(vnmsub_vv_w, 4)
1984 GEN_VEXT_VV(vnmsub_vv_d, 8)
1985
1986 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1987 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1988 { \
1989 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1990 TD d = *((TD *)vd + HD(i)); \
1991 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1992 }
1993
1994 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1995 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1996 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1997 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1998 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1999 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
2000 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
2001 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
2002 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
2003 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
2004 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
2005 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
2006 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
2007 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
2008 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
2009 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
2010 GEN_VEXT_VX(vmacc_vx_b, 1)
2011 GEN_VEXT_VX(vmacc_vx_h, 2)
2012 GEN_VEXT_VX(vmacc_vx_w, 4)
2013 GEN_VEXT_VX(vmacc_vx_d, 8)
2014 GEN_VEXT_VX(vnmsac_vx_b, 1)
2015 GEN_VEXT_VX(vnmsac_vx_h, 2)
2016 GEN_VEXT_VX(vnmsac_vx_w, 4)
2017 GEN_VEXT_VX(vnmsac_vx_d, 8)
2018 GEN_VEXT_VX(vmadd_vx_b, 1)
2019 GEN_VEXT_VX(vmadd_vx_h, 2)
2020 GEN_VEXT_VX(vmadd_vx_w, 4)
2021 GEN_VEXT_VX(vmadd_vx_d, 8)
2022 GEN_VEXT_VX(vnmsub_vx_b, 1)
2023 GEN_VEXT_VX(vnmsub_vx_h, 2)
2024 GEN_VEXT_VX(vnmsub_vx_w, 4)
2025 GEN_VEXT_VX(vnmsub_vx_d, 8)
2026
2027 /* Vector Widening Integer Multiply-Add Instructions */
2028 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
2029 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
2030 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
2031 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
2032 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
2033 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
2034 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
2035 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
2036 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
2037 GEN_VEXT_VV(vwmaccu_vv_b, 2)
2038 GEN_VEXT_VV(vwmaccu_vv_h, 4)
2039 GEN_VEXT_VV(vwmaccu_vv_w, 8)
2040 GEN_VEXT_VV(vwmacc_vv_b, 2)
2041 GEN_VEXT_VV(vwmacc_vv_h, 4)
2042 GEN_VEXT_VV(vwmacc_vv_w, 8)
2043 GEN_VEXT_VV(vwmaccsu_vv_b, 2)
2044 GEN_VEXT_VV(vwmaccsu_vv_h, 4)
2045 GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2046
2047 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
2048 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
2049 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
2050 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
2051 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
2052 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
2053 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
2054 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
2055 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
2056 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
2057 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
2058 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
2059 GEN_VEXT_VX(vwmaccu_vx_b, 2)
2060 GEN_VEXT_VX(vwmaccu_vx_h, 4)
2061 GEN_VEXT_VX(vwmaccu_vx_w, 8)
2062 GEN_VEXT_VX(vwmacc_vx_b, 2)
2063 GEN_VEXT_VX(vwmacc_vx_h, 4)
2064 GEN_VEXT_VX(vwmacc_vx_w, 8)
2065 GEN_VEXT_VX(vwmaccsu_vx_b, 2)
2066 GEN_VEXT_VX(vwmaccsu_vx_h, 4)
2067 GEN_VEXT_VX(vwmaccsu_vx_w, 8)
2068 GEN_VEXT_VX(vwmaccus_vx_b, 2)
2069 GEN_VEXT_VX(vwmaccus_vx_h, 4)
2070 GEN_VEXT_VX(vwmaccus_vx_w, 8)
2071
2072 /* Vector Integer Merge and Move Instructions */
2073 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
2074 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
2075 uint32_t desc) \
2076 { \
2077 uint32_t vl = env->vl; \
2078 uint32_t esz = sizeof(ETYPE); \
2079 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2080 uint32_t vta = vext_vta(desc); \
2081 uint32_t i; \
2082 \
2083 VSTART_CHECK_EARLY_EXIT(env, vl); \
2084 \
2085 for (i = env->vstart; i < vl; i++) { \
2086 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2087 *((ETYPE *)vd + H(i)) = s1; \
2088 } \
2089 env->vstart = 0; \
2090 /* set tail elements to 1s */ \
2091 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2092 }
2093
2094 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2095 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2096 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2097 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
2098
2099 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
2100 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2101 uint32_t desc) \
2102 { \
2103 uint32_t vl = env->vl; \
2104 uint32_t esz = sizeof(ETYPE); \
2105 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2106 uint32_t vta = vext_vta(desc); \
2107 uint32_t i; \
2108 \
2109 VSTART_CHECK_EARLY_EXIT(env, vl); \
2110 \
2111 for (i = env->vstart; i < vl; i++) { \
2112 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2113 } \
2114 env->vstart = 0; \
2115 /* set tail elements to 1s */ \
2116 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2117 }
2118
2119 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2120 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2121 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2122 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
2123
2124 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
2125 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2126 CPURISCVState *env, uint32_t desc) \
2127 { \
2128 uint32_t vl = env->vl; \
2129 uint32_t esz = sizeof(ETYPE); \
2130 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2131 uint32_t vta = vext_vta(desc); \
2132 uint32_t i; \
2133 \
2134 VSTART_CHECK_EARLY_EXIT(env, vl); \
2135 \
2136 for (i = env->vstart; i < vl; i++) { \
2137 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
2138 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2139 } \
2140 env->vstart = 0; \
2141 /* set tail elements to 1s */ \
2142 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2143 }
2144
2145 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2146 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2147 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2148 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
2149
2150 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
2151 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2152 void *vs2, CPURISCVState *env, uint32_t desc) \
2153 { \
2154 uint32_t vl = env->vl; \
2155 uint32_t esz = sizeof(ETYPE); \
2156 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2157 uint32_t vta = vext_vta(desc); \
2158 uint32_t i; \
2159 \
2160 VSTART_CHECK_EARLY_EXIT(env, vl); \
2161 \
2162 for (i = env->vstart; i < vl; i++) { \
2163 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
2164 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
2165 (ETYPE)(target_long)s1); \
2166 *((ETYPE *)vd + H(i)) = d; \
2167 } \
2168 env->vstart = 0; \
2169 /* set tail elements to 1s */ \
2170 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2171 }
2172
2173 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2174 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2175 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2176 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
2177
2178 /*
2179 * Vector Fixed-Point Arithmetic Instructions
2180 */
2181
2182 /* Vector Single-Width Saturating Add and Subtract */
2183
2184 /*
2185 * As fixed point instructions probably have round mode and saturation,
2186 * define common macros for fixed point here.
2187 */
2188 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2189 CPURISCVState *env, int vxrm);
2190
2191 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2192 static inline void \
2193 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2194 CPURISCVState *env, int vxrm) \
2195 { \
2196 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2197 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2198 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2199 }
2200
2201 static inline void
vext_vv_rm_1(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivv2_rm_fn * fn,uint32_t vma,uint32_t esz)2202 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2203 CPURISCVState *env,
2204 uint32_t vl, uint32_t vm, int vxrm,
2205 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
2206 {
2207 for (uint32_t i = env->vstart; i < vl; i++) {
2208 if (!vm && !vext_elem_mask(v0, i)) {
2209 /* set masked-off elements to 1s */
2210 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2211 continue;
2212 }
2213 fn(vd, vs1, vs2, i, env, vxrm);
2214 }
2215 env->vstart = 0;
2216 }
2217
2218 static inline void
vext_vv_rm_2(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t desc,opivv2_rm_fn * fn,uint32_t esz)2219 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2220 CPURISCVState *env,
2221 uint32_t desc,
2222 opivv2_rm_fn *fn, uint32_t esz)
2223 {
2224 uint32_t vm = vext_vm(desc);
2225 uint32_t vl = env->vl;
2226 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2227 uint32_t vta = vext_vta(desc);
2228 uint32_t vma = vext_vma(desc);
2229
2230 VSTART_CHECK_EARLY_EXIT(env, vl);
2231
2232 switch (env->vxrm) {
2233 case 0: /* rnu */
2234 vext_vv_rm_1(vd, v0, vs1, vs2,
2235 env, vl, vm, 0, fn, vma, esz);
2236 break;
2237 case 1: /* rne */
2238 vext_vv_rm_1(vd, v0, vs1, vs2,
2239 env, vl, vm, 1, fn, vma, esz);
2240 break;
2241 case 2: /* rdn */
2242 vext_vv_rm_1(vd, v0, vs1, vs2,
2243 env, vl, vm, 2, fn, vma, esz);
2244 break;
2245 default: /* rod */
2246 vext_vv_rm_1(vd, v0, vs1, vs2,
2247 env, vl, vm, 3, fn, vma, esz);
2248 break;
2249 }
2250 /* set tail elements to 1s */
2251 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2252 }
2253
2254 /* generate helpers for fixed point instructions with OPIVV format */
2255 #define GEN_VEXT_VV_RM(NAME, ESZ) \
2256 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2257 CPURISCVState *env, uint32_t desc) \
2258 { \
2259 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2260 do_##NAME, ESZ); \
2261 }
2262
saddu8(CPURISCVState * env,int vxrm,uint8_t a,uint8_t b)2263 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2264 uint8_t b)
2265 {
2266 uint8_t res = a + b;
2267 if (res < a) {
2268 res = UINT8_MAX;
2269 env->vxsat = 0x1;
2270 }
2271 return res;
2272 }
2273
saddu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2274 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2275 uint16_t b)
2276 {
2277 uint16_t res = a + b;
2278 if (res < a) {
2279 res = UINT16_MAX;
2280 env->vxsat = 0x1;
2281 }
2282 return res;
2283 }
2284
saddu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2285 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2286 uint32_t b)
2287 {
2288 uint32_t res = a + b;
2289 if (res < a) {
2290 res = UINT32_MAX;
2291 env->vxsat = 0x1;
2292 }
2293 return res;
2294 }
2295
saddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2296 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2297 uint64_t b)
2298 {
2299 uint64_t res = a + b;
2300 if (res < a) {
2301 res = UINT64_MAX;
2302 env->vxsat = 0x1;
2303 }
2304 return res;
2305 }
2306
2307 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2308 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2309 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2310 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2311 GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2312 GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2313 GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2314 GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
2315
2316 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2317 CPURISCVState *env, int vxrm);
2318
2319 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2320 static inline void \
2321 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2322 CPURISCVState *env, int vxrm) \
2323 { \
2324 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2325 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2326 }
2327
2328 static inline void
vext_vx_rm_1(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivx2_rm_fn * fn,uint32_t vma,uint32_t esz)2329 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2330 CPURISCVState *env,
2331 uint32_t vl, uint32_t vm, int vxrm,
2332 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
2333 {
2334 for (uint32_t i = env->vstart; i < vl; i++) {
2335 if (!vm && !vext_elem_mask(v0, i)) {
2336 /* set masked-off elements to 1s */
2337 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2338 continue;
2339 }
2340 fn(vd, s1, vs2, i, env, vxrm);
2341 }
2342 env->vstart = 0;
2343 }
2344
2345 static inline void
vext_vx_rm_2(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t desc,opivx2_rm_fn * fn,uint32_t esz)2346 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2347 CPURISCVState *env,
2348 uint32_t desc,
2349 opivx2_rm_fn *fn, uint32_t esz)
2350 {
2351 uint32_t vm = vext_vm(desc);
2352 uint32_t vl = env->vl;
2353 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2354 uint32_t vta = vext_vta(desc);
2355 uint32_t vma = vext_vma(desc);
2356
2357 VSTART_CHECK_EARLY_EXIT(env, vl);
2358
2359 switch (env->vxrm) {
2360 case 0: /* rnu */
2361 vext_vx_rm_1(vd, v0, s1, vs2,
2362 env, vl, vm, 0, fn, vma, esz);
2363 break;
2364 case 1: /* rne */
2365 vext_vx_rm_1(vd, v0, s1, vs2,
2366 env, vl, vm, 1, fn, vma, esz);
2367 break;
2368 case 2: /* rdn */
2369 vext_vx_rm_1(vd, v0, s1, vs2,
2370 env, vl, vm, 2, fn, vma, esz);
2371 break;
2372 default: /* rod */
2373 vext_vx_rm_1(vd, v0, s1, vs2,
2374 env, vl, vm, 3, fn, vma, esz);
2375 break;
2376 }
2377 /* set tail elements to 1s */
2378 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2379 }
2380
2381 /* generate helpers for fixed point instructions with OPIVX format */
2382 #define GEN_VEXT_VX_RM(NAME, ESZ) \
2383 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2384 void *vs2, CPURISCVState *env, \
2385 uint32_t desc) \
2386 { \
2387 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2388 do_##NAME, ESZ); \
2389 }
2390
RVVCALL(OPIVX2_RM,vsaddu_vx_b,OP_UUU_B,H1,H1,saddu8)2391 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2392 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2393 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2394 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2395 GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2396 GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2397 GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2398 GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
2399
2400 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2401 {
2402 int8_t res = a + b;
2403 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2404 res = a > 0 ? INT8_MAX : INT8_MIN;
2405 env->vxsat = 0x1;
2406 }
2407 return res;
2408 }
2409
sadd16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2410 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2411 int16_t b)
2412 {
2413 int16_t res = a + b;
2414 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2415 res = a > 0 ? INT16_MAX : INT16_MIN;
2416 env->vxsat = 0x1;
2417 }
2418 return res;
2419 }
2420
sadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2421 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2422 int32_t b)
2423 {
2424 int32_t res = a + b;
2425 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2426 res = a > 0 ? INT32_MAX : INT32_MIN;
2427 env->vxsat = 0x1;
2428 }
2429 return res;
2430 }
2431
sadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2432 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2433 int64_t b)
2434 {
2435 int64_t res = a + b;
2436 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2437 res = a > 0 ? INT64_MAX : INT64_MIN;
2438 env->vxsat = 0x1;
2439 }
2440 return res;
2441 }
2442
RVVCALL(OPIVV2_RM,vsadd_vv_b,OP_SSS_B,H1,H1,H1,sadd8)2443 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2444 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2445 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2446 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2447 GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2448 GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2449 GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2450 GEN_VEXT_VV_RM(vsadd_vv_d, 8)
2451
2452 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2453 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2454 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2455 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2456 GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2457 GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2458 GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2459 GEN_VEXT_VX_RM(vsadd_vx_d, 8)
2460
2461 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2462 uint8_t b)
2463 {
2464 uint8_t res = a - b;
2465 if (res > a) {
2466 res = 0;
2467 env->vxsat = 0x1;
2468 }
2469 return res;
2470 }
2471
ssubu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2472 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2473 uint16_t b)
2474 {
2475 uint16_t res = a - b;
2476 if (res > a) {
2477 res = 0;
2478 env->vxsat = 0x1;
2479 }
2480 return res;
2481 }
2482
ssubu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2483 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2484 uint32_t b)
2485 {
2486 uint32_t res = a - b;
2487 if (res > a) {
2488 res = 0;
2489 env->vxsat = 0x1;
2490 }
2491 return res;
2492 }
2493
ssubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2494 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2495 uint64_t b)
2496 {
2497 uint64_t res = a - b;
2498 if (res > a) {
2499 res = 0;
2500 env->vxsat = 0x1;
2501 }
2502 return res;
2503 }
2504
RVVCALL(OPIVV2_RM,vssubu_vv_b,OP_UUU_B,H1,H1,H1,ssubu8)2505 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2506 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2507 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2508 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2509 GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2510 GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2511 GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2512 GEN_VEXT_VV_RM(vssubu_vv_d, 8)
2513
2514 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2515 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2516 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2517 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2518 GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2519 GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2520 GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2521 GEN_VEXT_VX_RM(vssubu_vx_d, 8)
2522
2523 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2524 {
2525 int8_t res = a - b;
2526 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2527 res = a >= 0 ? INT8_MAX : INT8_MIN;
2528 env->vxsat = 0x1;
2529 }
2530 return res;
2531 }
2532
ssub16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2533 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2534 int16_t b)
2535 {
2536 int16_t res = a - b;
2537 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2538 res = a >= 0 ? INT16_MAX : INT16_MIN;
2539 env->vxsat = 0x1;
2540 }
2541 return res;
2542 }
2543
ssub32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2544 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2545 int32_t b)
2546 {
2547 int32_t res = a - b;
2548 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2549 res = a >= 0 ? INT32_MAX : INT32_MIN;
2550 env->vxsat = 0x1;
2551 }
2552 return res;
2553 }
2554
ssub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2555 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2556 int64_t b)
2557 {
2558 int64_t res = a - b;
2559 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2560 res = a >= 0 ? INT64_MAX : INT64_MIN;
2561 env->vxsat = 0x1;
2562 }
2563 return res;
2564 }
2565
RVVCALL(OPIVV2_RM,vssub_vv_b,OP_SSS_B,H1,H1,H1,ssub8)2566 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2567 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2568 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2569 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2570 GEN_VEXT_VV_RM(vssub_vv_b, 1)
2571 GEN_VEXT_VV_RM(vssub_vv_h, 2)
2572 GEN_VEXT_VV_RM(vssub_vv_w, 4)
2573 GEN_VEXT_VV_RM(vssub_vv_d, 8)
2574
2575 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2576 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2577 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2578 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2579 GEN_VEXT_VX_RM(vssub_vx_b, 1)
2580 GEN_VEXT_VX_RM(vssub_vx_h, 2)
2581 GEN_VEXT_VX_RM(vssub_vx_w, 4)
2582 GEN_VEXT_VX_RM(vssub_vx_d, 8)
2583
2584 /* Vector Single-Width Averaging Add and Subtract */
2585 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2586 {
2587 uint8_t d = extract64(v, shift, 1);
2588 uint8_t d1;
2589 uint64_t D1, D2;
2590
2591 if (shift == 0 || shift > 64) {
2592 return 0;
2593 }
2594
2595 d1 = extract64(v, shift - 1, 1);
2596 D1 = extract64(v, 0, shift);
2597 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2598 return d1;
2599 } else if (vxrm == 1) { /* round-to-nearest-even */
2600 if (shift > 1) {
2601 D2 = extract64(v, 0, shift - 1);
2602 return d1 & ((D2 != 0) | d);
2603 } else {
2604 return d1 & d;
2605 }
2606 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2607 return !d & (D1 != 0);
2608 }
2609 return 0; /* round-down (truncate) */
2610 }
2611
aadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2612 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2613 int32_t b)
2614 {
2615 int64_t res = (int64_t)a + b;
2616 uint8_t round = get_round(vxrm, res, 1);
2617
2618 return (res >> 1) + round;
2619 }
2620
aadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2621 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2622 int64_t b)
2623 {
2624 int64_t res = a + b;
2625 uint8_t round = get_round(vxrm, res, 1);
2626 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2627
2628 /* With signed overflow, bit 64 is inverse of bit 63. */
2629 return ((res >> 1) ^ over) + round;
2630 }
2631
RVVCALL(OPIVV2_RM,vaadd_vv_b,OP_SSS_B,H1,H1,H1,aadd32)2632 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2633 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2634 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2635 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2636 GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2637 GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2638 GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2639 GEN_VEXT_VV_RM(vaadd_vv_d, 8)
2640
2641 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2642 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2643 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2644 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2645 GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2646 GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2647 GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2648 GEN_VEXT_VX_RM(vaadd_vx_d, 8)
2649
2650 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2651 uint32_t a, uint32_t b)
2652 {
2653 uint64_t res = (uint64_t)a + b;
2654 uint8_t round = get_round(vxrm, res, 1);
2655
2656 return (res >> 1) + round;
2657 }
2658
aaddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2659 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2660 uint64_t a, uint64_t b)
2661 {
2662 uint64_t res = a + b;
2663 uint8_t round = get_round(vxrm, res, 1);
2664 uint64_t over = (uint64_t)(res < a) << 63;
2665
2666 return ((res >> 1) | over) + round;
2667 }
2668
RVVCALL(OPIVV2_RM,vaaddu_vv_b,OP_UUU_B,H1,H1,H1,aaddu32)2669 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2670 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2671 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2672 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2673 GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2674 GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2675 GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2676 GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
2677
2678 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2679 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2680 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2681 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2682 GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2683 GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2684 GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2685 GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
2686
2687 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2688 int32_t b)
2689 {
2690 int64_t res = (int64_t)a - b;
2691 uint8_t round = get_round(vxrm, res, 1);
2692
2693 return (res >> 1) + round;
2694 }
2695
asub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2696 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2697 int64_t b)
2698 {
2699 int64_t res = (int64_t)a - b;
2700 uint8_t round = get_round(vxrm, res, 1);
2701 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2702
2703 /* With signed overflow, bit 64 is inverse of bit 63. */
2704 return ((res >> 1) ^ over) + round;
2705 }
2706
RVVCALL(OPIVV2_RM,vasub_vv_b,OP_SSS_B,H1,H1,H1,asub32)2707 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2708 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2709 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2710 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2711 GEN_VEXT_VV_RM(vasub_vv_b, 1)
2712 GEN_VEXT_VV_RM(vasub_vv_h, 2)
2713 GEN_VEXT_VV_RM(vasub_vv_w, 4)
2714 GEN_VEXT_VV_RM(vasub_vv_d, 8)
2715
2716 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2717 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2718 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2719 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2720 GEN_VEXT_VX_RM(vasub_vx_b, 1)
2721 GEN_VEXT_VX_RM(vasub_vx_h, 2)
2722 GEN_VEXT_VX_RM(vasub_vx_w, 4)
2723 GEN_VEXT_VX_RM(vasub_vx_d, 8)
2724
2725 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2726 uint32_t a, uint32_t b)
2727 {
2728 int64_t res = (int64_t)a - b;
2729 uint8_t round = get_round(vxrm, res, 1);
2730
2731 return (res >> 1) + round;
2732 }
2733
asubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2734 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2735 uint64_t a, uint64_t b)
2736 {
2737 uint64_t res = (uint64_t)a - b;
2738 uint8_t round = get_round(vxrm, res, 1);
2739 uint64_t over = (uint64_t)(res > a) << 63;
2740
2741 return ((res >> 1) | over) + round;
2742 }
2743
RVVCALL(OPIVV2_RM,vasubu_vv_b,OP_UUU_B,H1,H1,H1,asubu32)2744 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2745 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2746 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2747 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2748 GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2749 GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2750 GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2751 GEN_VEXT_VV_RM(vasubu_vv_d, 8)
2752
2753 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2754 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2755 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2756 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2757 GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2758 GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2759 GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2760 GEN_VEXT_VX_RM(vasubu_vx_d, 8)
2761
2762 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2763 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2764 {
2765 uint8_t round;
2766 int16_t res;
2767
2768 res = (int16_t)a * (int16_t)b;
2769 round = get_round(vxrm, res, 7);
2770 res = (res >> 7) + round;
2771
2772 if (res > INT8_MAX) {
2773 env->vxsat = 0x1;
2774 return INT8_MAX;
2775 } else if (res < INT8_MIN) {
2776 env->vxsat = 0x1;
2777 return INT8_MIN;
2778 } else {
2779 return res;
2780 }
2781 }
2782
vsmul16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2783 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2784 {
2785 uint8_t round;
2786 int32_t res;
2787
2788 res = (int32_t)a * (int32_t)b;
2789 round = get_round(vxrm, res, 15);
2790 res = (res >> 15) + round;
2791
2792 if (res > INT16_MAX) {
2793 env->vxsat = 0x1;
2794 return INT16_MAX;
2795 } else if (res < INT16_MIN) {
2796 env->vxsat = 0x1;
2797 return INT16_MIN;
2798 } else {
2799 return res;
2800 }
2801 }
2802
vsmul32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2803 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2804 {
2805 uint8_t round;
2806 int64_t res;
2807
2808 res = (int64_t)a * (int64_t)b;
2809 round = get_round(vxrm, res, 31);
2810 res = (res >> 31) + round;
2811
2812 if (res > INT32_MAX) {
2813 env->vxsat = 0x1;
2814 return INT32_MAX;
2815 } else if (res < INT32_MIN) {
2816 env->vxsat = 0x1;
2817 return INT32_MIN;
2818 } else {
2819 return res;
2820 }
2821 }
2822
vsmul64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2823 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2824 {
2825 uint8_t round;
2826 uint64_t hi_64, lo_64;
2827 int64_t res;
2828
2829 if (a == INT64_MIN && b == INT64_MIN) {
2830 env->vxsat = 1;
2831 return INT64_MAX;
2832 }
2833
2834 muls64(&lo_64, &hi_64, a, b);
2835 round = get_round(vxrm, lo_64, 63);
2836 /*
2837 * Cannot overflow, as there are always
2838 * 2 sign bits after multiply.
2839 */
2840 res = (hi_64 << 1) | (lo_64 >> 63);
2841 if (round) {
2842 if (res == INT64_MAX) {
2843 env->vxsat = 1;
2844 } else {
2845 res += 1;
2846 }
2847 }
2848 return res;
2849 }
2850
RVVCALL(OPIVV2_RM,vsmul_vv_b,OP_SSS_B,H1,H1,H1,vsmul8)2851 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2852 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2853 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2854 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2855 GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2856 GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2857 GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2858 GEN_VEXT_VV_RM(vsmul_vv_d, 8)
2859
2860 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2861 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2862 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2863 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2864 GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2865 GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2866 GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2867 GEN_VEXT_VX_RM(vsmul_vx_d, 8)
2868
2869 /* Vector Single-Width Scaling Shift Instructions */
2870 static inline uint8_t
2871 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2872 {
2873 uint8_t round, shift = b & 0x7;
2874 uint8_t res;
2875
2876 round = get_round(vxrm, a, shift);
2877 res = (a >> shift) + round;
2878 return res;
2879 }
2880 static inline uint16_t
vssrl16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2881 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2882 {
2883 uint8_t round, shift = b & 0xf;
2884
2885 round = get_round(vxrm, a, shift);
2886 return (a >> shift) + round;
2887 }
2888 static inline uint32_t
vssrl32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2889 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2890 {
2891 uint8_t round, shift = b & 0x1f;
2892
2893 round = get_round(vxrm, a, shift);
2894 return (a >> shift) + round;
2895 }
2896 static inline uint64_t
vssrl64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2897 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2898 {
2899 uint8_t round, shift = b & 0x3f;
2900
2901 round = get_round(vxrm, a, shift);
2902 return (a >> shift) + round;
2903 }
RVVCALL(OPIVV2_RM,vssrl_vv_b,OP_UUU_B,H1,H1,H1,vssrl8)2904 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2905 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2906 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2907 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2908 GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2909 GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2910 GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2911 GEN_VEXT_VV_RM(vssrl_vv_d, 8)
2912
2913 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2914 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2915 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2916 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2917 GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2918 GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2919 GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2920 GEN_VEXT_VX_RM(vssrl_vx_d, 8)
2921
2922 static inline int8_t
2923 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2924 {
2925 uint8_t round, shift = b & 0x7;
2926
2927 round = get_round(vxrm, a, shift);
2928 return (a >> shift) + round;
2929 }
2930 static inline int16_t
vssra16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2931 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2932 {
2933 uint8_t round, shift = b & 0xf;
2934
2935 round = get_round(vxrm, a, shift);
2936 return (a >> shift) + round;
2937 }
2938 static inline int32_t
vssra32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2939 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2940 {
2941 uint8_t round, shift = b & 0x1f;
2942
2943 round = get_round(vxrm, a, shift);
2944 return (a >> shift) + round;
2945 }
2946 static inline int64_t
vssra64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2947 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2948 {
2949 uint8_t round, shift = b & 0x3f;
2950
2951 round = get_round(vxrm, a, shift);
2952 return (a >> shift) + round;
2953 }
2954
RVVCALL(OPIVV2_RM,vssra_vv_b,OP_SSS_B,H1,H1,H1,vssra8)2955 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2956 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2957 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2958 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2959 GEN_VEXT_VV_RM(vssra_vv_b, 1)
2960 GEN_VEXT_VV_RM(vssra_vv_h, 2)
2961 GEN_VEXT_VV_RM(vssra_vv_w, 4)
2962 GEN_VEXT_VV_RM(vssra_vv_d, 8)
2963
2964 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2965 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2966 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2967 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2968 GEN_VEXT_VX_RM(vssra_vx_b, 1)
2969 GEN_VEXT_VX_RM(vssra_vx_h, 2)
2970 GEN_VEXT_VX_RM(vssra_vx_w, 4)
2971 GEN_VEXT_VX_RM(vssra_vx_d, 8)
2972
2973 /* Vector Narrowing Fixed-Point Clip Instructions */
2974 static inline int8_t
2975 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2976 {
2977 uint8_t round, shift = b & 0xf;
2978 int16_t res;
2979
2980 round = get_round(vxrm, a, shift);
2981 res = (a >> shift) + round;
2982 if (res > INT8_MAX) {
2983 env->vxsat = 0x1;
2984 return INT8_MAX;
2985 } else if (res < INT8_MIN) {
2986 env->vxsat = 0x1;
2987 return INT8_MIN;
2988 } else {
2989 return res;
2990 }
2991 }
2992
2993 static inline int16_t
vnclip16(CPURISCVState * env,int vxrm,int32_t a,int16_t b)2994 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2995 {
2996 uint8_t round, shift = b & 0x1f;
2997 int32_t res;
2998
2999 round = get_round(vxrm, a, shift);
3000 res = (a >> shift) + round;
3001 if (res > INT16_MAX) {
3002 env->vxsat = 0x1;
3003 return INT16_MAX;
3004 } else if (res < INT16_MIN) {
3005 env->vxsat = 0x1;
3006 return INT16_MIN;
3007 } else {
3008 return res;
3009 }
3010 }
3011
3012 static inline int32_t
vnclip32(CPURISCVState * env,int vxrm,int64_t a,int32_t b)3013 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
3014 {
3015 uint8_t round, shift = b & 0x3f;
3016 int64_t res;
3017
3018 round = get_round(vxrm, a, shift);
3019 res = (a >> shift) + round;
3020 if (res > INT32_MAX) {
3021 env->vxsat = 0x1;
3022 return INT32_MAX;
3023 } else if (res < INT32_MIN) {
3024 env->vxsat = 0x1;
3025 return INT32_MIN;
3026 } else {
3027 return res;
3028 }
3029 }
3030
RVVCALL(OPIVV2_RM,vnclip_wv_b,NOP_SSS_B,H1,H2,H1,vnclip8)3031 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
3032 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
3033 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
3034 GEN_VEXT_VV_RM(vnclip_wv_b, 1)
3035 GEN_VEXT_VV_RM(vnclip_wv_h, 2)
3036 GEN_VEXT_VV_RM(vnclip_wv_w, 4)
3037
3038 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
3039 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
3040 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
3041 GEN_VEXT_VX_RM(vnclip_wx_b, 1)
3042 GEN_VEXT_VX_RM(vnclip_wx_h, 2)
3043 GEN_VEXT_VX_RM(vnclip_wx_w, 4)
3044
3045 static inline uint8_t
3046 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
3047 {
3048 uint8_t round, shift = b & 0xf;
3049 uint16_t res;
3050
3051 round = get_round(vxrm, a, shift);
3052 res = (a >> shift) + round;
3053 if (res > UINT8_MAX) {
3054 env->vxsat = 0x1;
3055 return UINT8_MAX;
3056 } else {
3057 return res;
3058 }
3059 }
3060
3061 static inline uint16_t
vnclipu16(CPURISCVState * env,int vxrm,uint32_t a,uint16_t b)3062 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
3063 {
3064 uint8_t round, shift = b & 0x1f;
3065 uint32_t res;
3066
3067 round = get_round(vxrm, a, shift);
3068 res = (a >> shift) + round;
3069 if (res > UINT16_MAX) {
3070 env->vxsat = 0x1;
3071 return UINT16_MAX;
3072 } else {
3073 return res;
3074 }
3075 }
3076
3077 static inline uint32_t
vnclipu32(CPURISCVState * env,int vxrm,uint64_t a,uint32_t b)3078 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
3079 {
3080 uint8_t round, shift = b & 0x3f;
3081 uint64_t res;
3082
3083 round = get_round(vxrm, a, shift);
3084 res = (a >> shift) + round;
3085 if (res > UINT32_MAX) {
3086 env->vxsat = 0x1;
3087 return UINT32_MAX;
3088 } else {
3089 return res;
3090 }
3091 }
3092
RVVCALL(OPIVV2_RM,vnclipu_wv_b,NOP_UUU_B,H1,H2,H1,vnclipu8)3093 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3094 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3095 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
3096 GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
3097 GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
3098 GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
3099
3100 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3101 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3102 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
3103 GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3104 GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3105 GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
3106
3107 /*
3108 * Vector Float Point Arithmetic Instructions
3109 */
3110 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
3111 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3112 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3113 CPURISCVState *env) \
3114 { \
3115 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3116 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3117 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3118 }
3119
3120 #define GEN_VEXT_VV_ENV(NAME, ESZ) \
3121 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3122 void *vs2, CPURISCVState *env, \
3123 uint32_t desc) \
3124 { \
3125 uint32_t vm = vext_vm(desc); \
3126 uint32_t vl = env->vl; \
3127 uint32_t total_elems = \
3128 vext_get_total_elems(env, desc, ESZ); \
3129 uint32_t vta = vext_vta(desc); \
3130 uint32_t vma = vext_vma(desc); \
3131 uint32_t i; \
3132 \
3133 VSTART_CHECK_EARLY_EXIT(env, vl); \
3134 \
3135 for (i = env->vstart; i < vl; i++) { \
3136 if (!vm && !vext_elem_mask(v0, i)) { \
3137 /* set masked-off elements to 1s */ \
3138 vext_set_elems_1s(vd, vma, i * ESZ, \
3139 (i + 1) * ESZ); \
3140 continue; \
3141 } \
3142 do_##NAME(vd, vs1, vs2, i, env); \
3143 } \
3144 env->vstart = 0; \
3145 /* set tail elements to 1s */ \
3146 vext_set_elems_1s(vd, vta, vl * ESZ, \
3147 total_elems * ESZ); \
3148 }
3149
3150 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3151 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3152 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3153 GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3154 GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3155 GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
3156
3157 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3158 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3159 CPURISCVState *env) \
3160 { \
3161 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3162 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3163 }
3164
3165 #define GEN_VEXT_VF(NAME, ESZ) \
3166 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3167 void *vs2, CPURISCVState *env, \
3168 uint32_t desc) \
3169 { \
3170 uint32_t vm = vext_vm(desc); \
3171 uint32_t vl = env->vl; \
3172 uint32_t total_elems = \
3173 vext_get_total_elems(env, desc, ESZ); \
3174 uint32_t vta = vext_vta(desc); \
3175 uint32_t vma = vext_vma(desc); \
3176 uint32_t i; \
3177 \
3178 VSTART_CHECK_EARLY_EXIT(env, vl); \
3179 \
3180 for (i = env->vstart; i < vl; i++) { \
3181 if (!vm && !vext_elem_mask(v0, i)) { \
3182 /* set masked-off elements to 1s */ \
3183 vext_set_elems_1s(vd, vma, i * ESZ, \
3184 (i + 1) * ESZ); \
3185 continue; \
3186 } \
3187 do_##NAME(vd, s1, vs2, i, env); \
3188 } \
3189 env->vstart = 0; \
3190 /* set tail elements to 1s */ \
3191 vext_set_elems_1s(vd, vta, vl * ESZ, \
3192 total_elems * ESZ); \
3193 }
3194
3195 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3196 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3197 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3198 GEN_VEXT_VF(vfadd_vf_h, 2)
3199 GEN_VEXT_VF(vfadd_vf_w, 4)
3200 GEN_VEXT_VF(vfadd_vf_d, 8)
3201
3202 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3203 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3204 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3205 GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3206 GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3207 GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
3208 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3209 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3210 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3211 GEN_VEXT_VF(vfsub_vf_h, 2)
3212 GEN_VEXT_VF(vfsub_vf_w, 4)
3213 GEN_VEXT_VF(vfsub_vf_d, 8)
3214
3215 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3216 {
3217 return float16_sub(b, a, s);
3218 }
3219
float32_rsub(uint32_t a,uint32_t b,float_status * s)3220 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3221 {
3222 return float32_sub(b, a, s);
3223 }
3224
float64_rsub(uint64_t a,uint64_t b,float_status * s)3225 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3226 {
3227 return float64_sub(b, a, s);
3228 }
3229
RVVCALL(OPFVF2,vfrsub_vf_h,OP_UUU_H,H2,H2,float16_rsub)3230 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3231 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3232 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3233 GEN_VEXT_VF(vfrsub_vf_h, 2)
3234 GEN_VEXT_VF(vfrsub_vf_w, 4)
3235 GEN_VEXT_VF(vfrsub_vf_d, 8)
3236
3237 /* Vector Widening Floating-Point Add/Subtract Instructions */
3238 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3239 {
3240 return float32_add(float16_to_float32(a, true, s),
3241 float16_to_float32(b, true, s), s);
3242 }
3243
vfwadd32(uint32_t a,uint32_t b,float_status * s)3244 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3245 {
3246 return float64_add(float32_to_float64(a, s),
3247 float32_to_float64(b, s), s);
3248
3249 }
3250
RVVCALL(OPFVV2,vfwadd_vv_h,WOP_UUU_H,H4,H2,H2,vfwadd16)3251 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3252 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3253 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3254 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
3255 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3256 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3257 GEN_VEXT_VF(vfwadd_vf_h, 4)
3258 GEN_VEXT_VF(vfwadd_vf_w, 8)
3259
3260 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3261 {
3262 return float32_sub(float16_to_float32(a, true, s),
3263 float16_to_float32(b, true, s), s);
3264 }
3265
vfwsub32(uint32_t a,uint32_t b,float_status * s)3266 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3267 {
3268 return float64_sub(float32_to_float64(a, s),
3269 float32_to_float64(b, s), s);
3270
3271 }
3272
RVVCALL(OPFVV2,vfwsub_vv_h,WOP_UUU_H,H4,H2,H2,vfwsub16)3273 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3274 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3275 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3276 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
3277 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3278 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3279 GEN_VEXT_VF(vfwsub_vf_h, 4)
3280 GEN_VEXT_VF(vfwsub_vf_w, 8)
3281
3282 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3283 {
3284 return float32_add(a, float16_to_float32(b, true, s), s);
3285 }
3286
vfwaddw32(uint64_t a,uint32_t b,float_status * s)3287 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3288 {
3289 return float64_add(a, float32_to_float64(b, s), s);
3290 }
3291
RVVCALL(OPFVV2,vfwadd_wv_h,WOP_WUUU_H,H4,H2,H2,vfwaddw16)3292 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3293 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3294 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3295 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
3296 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3297 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3298 GEN_VEXT_VF(vfwadd_wf_h, 4)
3299 GEN_VEXT_VF(vfwadd_wf_w, 8)
3300
3301 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3302 {
3303 return float32_sub(a, float16_to_float32(b, true, s), s);
3304 }
3305
vfwsubw32(uint64_t a,uint32_t b,float_status * s)3306 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3307 {
3308 return float64_sub(a, float32_to_float64(b, s), s);
3309 }
3310
RVVCALL(OPFVV2,vfwsub_wv_h,WOP_WUUU_H,H4,H2,H2,vfwsubw16)3311 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3312 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3313 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3314 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
3315 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3316 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3317 GEN_VEXT_VF(vfwsub_wf_h, 4)
3318 GEN_VEXT_VF(vfwsub_wf_w, 8)
3319
3320 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3321 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3322 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3323 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3324 GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3325 GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3326 GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
3327 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3328 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3329 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3330 GEN_VEXT_VF(vfmul_vf_h, 2)
3331 GEN_VEXT_VF(vfmul_vf_w, 4)
3332 GEN_VEXT_VF(vfmul_vf_d, 8)
3333
3334 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3335 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3336 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3337 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3338 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3339 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
3340 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3341 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3342 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3343 GEN_VEXT_VF(vfdiv_vf_h, 2)
3344 GEN_VEXT_VF(vfdiv_vf_w, 4)
3345 GEN_VEXT_VF(vfdiv_vf_d, 8)
3346
3347 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3348 {
3349 return float16_div(b, a, s);
3350 }
3351
float32_rdiv(uint32_t a,uint32_t b,float_status * s)3352 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3353 {
3354 return float32_div(b, a, s);
3355 }
3356
float64_rdiv(uint64_t a,uint64_t b,float_status * s)3357 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3358 {
3359 return float64_div(b, a, s);
3360 }
3361
RVVCALL(OPFVF2,vfrdiv_vf_h,OP_UUU_H,H2,H2,float16_rdiv)3362 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3363 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3364 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3365 GEN_VEXT_VF(vfrdiv_vf_h, 2)
3366 GEN_VEXT_VF(vfrdiv_vf_w, 4)
3367 GEN_VEXT_VF(vfrdiv_vf_d, 8)
3368
3369 /* Vector Widening Floating-Point Multiply */
3370 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3371 {
3372 return float32_mul(float16_to_float32(a, true, s),
3373 float16_to_float32(b, true, s), s);
3374 }
3375
vfwmul32(uint32_t a,uint32_t b,float_status * s)3376 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3377 {
3378 return float64_mul(float32_to_float64(a, s),
3379 float32_to_float64(b, s), s);
3380
3381 }
RVVCALL(OPFVV2,vfwmul_vv_h,WOP_UUU_H,H4,H2,H2,vfwmul16)3382 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3383 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3384 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3385 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
3386 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3387 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3388 GEN_VEXT_VF(vfwmul_vf_h, 4)
3389 GEN_VEXT_VF(vfwmul_vf_w, 8)
3390
3391 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3392 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3393 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3394 CPURISCVState *env) \
3395 { \
3396 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3397 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3398 TD d = *((TD *)vd + HD(i)); \
3399 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3400 }
3401
3402 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3403 {
3404 return float16_muladd(a, b, d, 0, s);
3405 }
3406
fmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3407 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3408 {
3409 return float32_muladd(a, b, d, 0, s);
3410 }
3411
fmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3412 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3413 {
3414 return float64_muladd(a, b, d, 0, s);
3415 }
3416
RVVCALL(OPFVV3,vfmacc_vv_h,OP_UUU_H,H2,H2,H2,fmacc16)3417 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3418 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3419 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3420 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3421 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3422 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
3423
3424 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3425 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3426 CPURISCVState *env) \
3427 { \
3428 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3429 TD d = *((TD *)vd + HD(i)); \
3430 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3431 }
3432
3433 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3434 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3435 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3436 GEN_VEXT_VF(vfmacc_vf_h, 2)
3437 GEN_VEXT_VF(vfmacc_vf_w, 4)
3438 GEN_VEXT_VF(vfmacc_vf_d, 8)
3439
3440 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3441 {
3442 return float16_muladd(a, b, d, float_muladd_negate_c |
3443 float_muladd_negate_product, s);
3444 }
3445
fnmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3446 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3447 {
3448 return float32_muladd(a, b, d, float_muladd_negate_c |
3449 float_muladd_negate_product, s);
3450 }
3451
fnmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3452 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3453 {
3454 return float64_muladd(a, b, d, float_muladd_negate_c |
3455 float_muladd_negate_product, s);
3456 }
3457
RVVCALL(OPFVV3,vfnmacc_vv_h,OP_UUU_H,H2,H2,H2,fnmacc16)3458 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3459 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3460 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3461 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3462 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3463 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
3464 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3465 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3466 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3467 GEN_VEXT_VF(vfnmacc_vf_h, 2)
3468 GEN_VEXT_VF(vfnmacc_vf_w, 4)
3469 GEN_VEXT_VF(vfnmacc_vf_d, 8)
3470
3471 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3472 {
3473 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3474 }
3475
fmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3476 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3477 {
3478 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3479 }
3480
fmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3481 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3482 {
3483 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3484 }
3485
RVVCALL(OPFVV3,vfmsac_vv_h,OP_UUU_H,H2,H2,H2,fmsac16)3486 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3487 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3488 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3489 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3490 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3491 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
3492 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3493 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3494 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3495 GEN_VEXT_VF(vfmsac_vf_h, 2)
3496 GEN_VEXT_VF(vfmsac_vf_w, 4)
3497 GEN_VEXT_VF(vfmsac_vf_d, 8)
3498
3499 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3500 {
3501 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3502 }
3503
fnmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3504 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3505 {
3506 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3507 }
3508
fnmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3509 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3510 {
3511 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3512 }
3513
RVVCALL(OPFVV3,vfnmsac_vv_h,OP_UUU_H,H2,H2,H2,fnmsac16)3514 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3515 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3516 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3517 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3518 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3519 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
3520 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3521 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3522 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3523 GEN_VEXT_VF(vfnmsac_vf_h, 2)
3524 GEN_VEXT_VF(vfnmsac_vf_w, 4)
3525 GEN_VEXT_VF(vfnmsac_vf_d, 8)
3526
3527 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3528 {
3529 return float16_muladd(d, b, a, 0, s);
3530 }
3531
fmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3532 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3533 {
3534 return float32_muladd(d, b, a, 0, s);
3535 }
3536
fmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3537 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3538 {
3539 return float64_muladd(d, b, a, 0, s);
3540 }
3541
RVVCALL(OPFVV3,vfmadd_vv_h,OP_UUU_H,H2,H2,H2,fmadd16)3542 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3543 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3544 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3545 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3546 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3547 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
3548 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3549 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3550 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3551 GEN_VEXT_VF(vfmadd_vf_h, 2)
3552 GEN_VEXT_VF(vfmadd_vf_w, 4)
3553 GEN_VEXT_VF(vfmadd_vf_d, 8)
3554
3555 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3556 {
3557 return float16_muladd(d, b, a, float_muladd_negate_c |
3558 float_muladd_negate_product, s);
3559 }
3560
fnmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3561 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3562 {
3563 return float32_muladd(d, b, a, float_muladd_negate_c |
3564 float_muladd_negate_product, s);
3565 }
3566
fnmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3567 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3568 {
3569 return float64_muladd(d, b, a, float_muladd_negate_c |
3570 float_muladd_negate_product, s);
3571 }
3572
RVVCALL(OPFVV3,vfnmadd_vv_h,OP_UUU_H,H2,H2,H2,fnmadd16)3573 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3574 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3575 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3576 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3577 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3578 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
3579 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3580 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3581 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3582 GEN_VEXT_VF(vfnmadd_vf_h, 2)
3583 GEN_VEXT_VF(vfnmadd_vf_w, 4)
3584 GEN_VEXT_VF(vfnmadd_vf_d, 8)
3585
3586 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3587 {
3588 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3589 }
3590
fmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3591 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3592 {
3593 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3594 }
3595
fmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3596 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3597 {
3598 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3599 }
3600
RVVCALL(OPFVV3,vfmsub_vv_h,OP_UUU_H,H2,H2,H2,fmsub16)3601 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3602 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3603 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3604 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3605 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3606 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
3607 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3608 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3609 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3610 GEN_VEXT_VF(vfmsub_vf_h, 2)
3611 GEN_VEXT_VF(vfmsub_vf_w, 4)
3612 GEN_VEXT_VF(vfmsub_vf_d, 8)
3613
3614 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3615 {
3616 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3617 }
3618
fnmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3619 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3620 {
3621 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3622 }
3623
fnmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3624 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3625 {
3626 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3627 }
3628
RVVCALL(OPFVV3,vfnmsub_vv_h,OP_UUU_H,H2,H2,H2,fnmsub16)3629 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3630 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3631 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3632 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3633 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3634 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
3635 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3636 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3637 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3638 GEN_VEXT_VF(vfnmsub_vf_h, 2)
3639 GEN_VEXT_VF(vfnmsub_vf_w, 4)
3640 GEN_VEXT_VF(vfnmsub_vf_d, 8)
3641
3642 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3643 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3644 {
3645 return float32_muladd(float16_to_float32(a, true, s),
3646 float16_to_float32(b, true, s), d, 0, s);
3647 }
3648
fwmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3649 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3650 {
3651 return float64_muladd(float32_to_float64(a, s),
3652 float32_to_float64(b, s), d, 0, s);
3653 }
3654
RVVCALL(OPFVV3,vfwmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwmacc16)3655 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3656 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3657 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3658 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
3659 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3660 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3661 GEN_VEXT_VF(vfwmacc_vf_h, 4)
3662 GEN_VEXT_VF(vfwmacc_vf_w, 8)
3663
3664 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3665 {
3666 return float32_muladd(bfloat16_to_float32(a, s),
3667 bfloat16_to_float32(b, s), d, 0, s);
3668 }
3669
RVVCALL(OPFVV3,vfwmaccbf16_vv,WOP_UUU_H,H4,H2,H2,fwmaccbf16)3670 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3671 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3672 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
3673 GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3674
3675 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3676 {
3677 return float32_muladd(float16_to_float32(a, true, s),
3678 float16_to_float32(b, true, s), d,
3679 float_muladd_negate_c | float_muladd_negate_product,
3680 s);
3681 }
3682
fwnmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3683 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3684 {
3685 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3686 d, float_muladd_negate_c |
3687 float_muladd_negate_product, s);
3688 }
3689
RVVCALL(OPFVV3,vfwnmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwnmacc16)3690 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3691 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3692 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3693 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
3694 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3695 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3696 GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3697 GEN_VEXT_VF(vfwnmacc_vf_w, 8)
3698
3699 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3700 {
3701 return float32_muladd(float16_to_float32(a, true, s),
3702 float16_to_float32(b, true, s), d,
3703 float_muladd_negate_c, s);
3704 }
3705
fwmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3706 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3707 {
3708 return float64_muladd(float32_to_float64(a, s),
3709 float32_to_float64(b, s), d,
3710 float_muladd_negate_c, s);
3711 }
3712
RVVCALL(OPFVV3,vfwmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwmsac16)3713 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3714 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3715 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3716 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
3717 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3718 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3719 GEN_VEXT_VF(vfwmsac_vf_h, 4)
3720 GEN_VEXT_VF(vfwmsac_vf_w, 8)
3721
3722 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3723 {
3724 return float32_muladd(float16_to_float32(a, true, s),
3725 float16_to_float32(b, true, s), d,
3726 float_muladd_negate_product, s);
3727 }
3728
fwnmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3729 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3730 {
3731 return float64_muladd(float32_to_float64(a, s),
3732 float32_to_float64(b, s), d,
3733 float_muladd_negate_product, s);
3734 }
3735
RVVCALL(OPFVV3,vfwnmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwnmsac16)3736 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3737 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3738 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3739 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
3740 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3741 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3742 GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3743 GEN_VEXT_VF(vfwnmsac_vf_w, 8)
3744
3745 /* Vector Floating-Point Square-Root Instruction */
3746 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3747 static void do_##NAME(void *vd, void *vs2, int i, \
3748 CPURISCVState *env) \
3749 { \
3750 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3751 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3752 }
3753
3754 #define GEN_VEXT_V_ENV(NAME, ESZ) \
3755 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3756 CPURISCVState *env, uint32_t desc) \
3757 { \
3758 uint32_t vm = vext_vm(desc); \
3759 uint32_t vl = env->vl; \
3760 uint32_t total_elems = \
3761 vext_get_total_elems(env, desc, ESZ); \
3762 uint32_t vta = vext_vta(desc); \
3763 uint32_t vma = vext_vma(desc); \
3764 uint32_t i; \
3765 \
3766 VSTART_CHECK_EARLY_EXIT(env, vl); \
3767 \
3768 if (vl == 0) { \
3769 return; \
3770 } \
3771 for (i = env->vstart; i < vl; i++) { \
3772 if (!vm && !vext_elem_mask(v0, i)) { \
3773 /* set masked-off elements to 1s */ \
3774 vext_set_elems_1s(vd, vma, i * ESZ, \
3775 (i + 1) * ESZ); \
3776 continue; \
3777 } \
3778 do_##NAME(vd, vs2, i, env); \
3779 } \
3780 env->vstart = 0; \
3781 vext_set_elems_1s(vd, vta, vl * ESZ, \
3782 total_elems * ESZ); \
3783 }
3784
3785 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3786 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3787 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3788 GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3789 GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3790 GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
3791
3792 /*
3793 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3794 *
3795 * Adapted from riscv-v-spec recip.c:
3796 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3797 */
3798 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3799 {
3800 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3801 uint64_t exp = extract64(f, frac_size, exp_size);
3802 uint64_t frac = extract64(f, 0, frac_size);
3803
3804 const uint8_t lookup_table[] = {
3805 52, 51, 50, 48, 47, 46, 44, 43,
3806 42, 41, 40, 39, 38, 36, 35, 34,
3807 33, 32, 31, 30, 30, 29, 28, 27,
3808 26, 25, 24, 23, 23, 22, 21, 20,
3809 19, 19, 18, 17, 16, 16, 15, 14,
3810 14, 13, 12, 12, 11, 10, 10, 9,
3811 9, 8, 7, 7, 6, 6, 5, 4,
3812 4, 3, 3, 2, 2, 1, 1, 0,
3813 127, 125, 123, 121, 119, 118, 116, 114,
3814 113, 111, 109, 108, 106, 105, 103, 102,
3815 100, 99, 97, 96, 95, 93, 92, 91,
3816 90, 88, 87, 86, 85, 84, 83, 82,
3817 80, 79, 78, 77, 76, 75, 74, 73,
3818 72, 71, 70, 70, 69, 68, 67, 66,
3819 65, 64, 63, 63, 62, 61, 60, 59,
3820 59, 58, 57, 56, 56, 55, 54, 53
3821 };
3822 const int precision = 7;
3823
3824 if (exp == 0 && frac != 0) { /* subnormal */
3825 /* Normalize the subnormal. */
3826 while (extract64(frac, frac_size - 1, 1) == 0) {
3827 exp--;
3828 frac <<= 1;
3829 }
3830
3831 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3832 }
3833
3834 int idx = ((exp & 1) << (precision - 1)) |
3835 (frac >> (frac_size - precision + 1));
3836 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3837 (frac_size - precision);
3838 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3839
3840 uint64_t val = 0;
3841 val = deposit64(val, 0, frac_size, out_frac);
3842 val = deposit64(val, frac_size, exp_size, out_exp);
3843 val = deposit64(val, frac_size + exp_size, 1, sign);
3844 return val;
3845 }
3846
frsqrt7_h(float16 f,float_status * s)3847 static float16 frsqrt7_h(float16 f, float_status *s)
3848 {
3849 int exp_size = 5, frac_size = 10;
3850 bool sign = float16_is_neg(f);
3851
3852 /*
3853 * frsqrt7(sNaN) = canonical NaN
3854 * frsqrt7(-inf) = canonical NaN
3855 * frsqrt7(-normal) = canonical NaN
3856 * frsqrt7(-subnormal) = canonical NaN
3857 */
3858 if (float16_is_signaling_nan(f, s) ||
3859 (float16_is_infinity(f) && sign) ||
3860 (float16_is_normal(f) && sign) ||
3861 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3862 s->float_exception_flags |= float_flag_invalid;
3863 return float16_default_nan(s);
3864 }
3865
3866 /* frsqrt7(qNaN) = canonical NaN */
3867 if (float16_is_quiet_nan(f, s)) {
3868 return float16_default_nan(s);
3869 }
3870
3871 /* frsqrt7(+-0) = +-inf */
3872 if (float16_is_zero(f)) {
3873 s->float_exception_flags |= float_flag_divbyzero;
3874 return float16_set_sign(float16_infinity, sign);
3875 }
3876
3877 /* frsqrt7(+inf) = +0 */
3878 if (float16_is_infinity(f) && !sign) {
3879 return float16_set_sign(float16_zero, sign);
3880 }
3881
3882 /* +normal, +subnormal */
3883 uint64_t val = frsqrt7(f, exp_size, frac_size);
3884 return make_float16(val);
3885 }
3886
frsqrt7_s(float32 f,float_status * s)3887 static float32 frsqrt7_s(float32 f, float_status *s)
3888 {
3889 int exp_size = 8, frac_size = 23;
3890 bool sign = float32_is_neg(f);
3891
3892 /*
3893 * frsqrt7(sNaN) = canonical NaN
3894 * frsqrt7(-inf) = canonical NaN
3895 * frsqrt7(-normal) = canonical NaN
3896 * frsqrt7(-subnormal) = canonical NaN
3897 */
3898 if (float32_is_signaling_nan(f, s) ||
3899 (float32_is_infinity(f) && sign) ||
3900 (float32_is_normal(f) && sign) ||
3901 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3902 s->float_exception_flags |= float_flag_invalid;
3903 return float32_default_nan(s);
3904 }
3905
3906 /* frsqrt7(qNaN) = canonical NaN */
3907 if (float32_is_quiet_nan(f, s)) {
3908 return float32_default_nan(s);
3909 }
3910
3911 /* frsqrt7(+-0) = +-inf */
3912 if (float32_is_zero(f)) {
3913 s->float_exception_flags |= float_flag_divbyzero;
3914 return float32_set_sign(float32_infinity, sign);
3915 }
3916
3917 /* frsqrt7(+inf) = +0 */
3918 if (float32_is_infinity(f) && !sign) {
3919 return float32_set_sign(float32_zero, sign);
3920 }
3921
3922 /* +normal, +subnormal */
3923 uint64_t val = frsqrt7(f, exp_size, frac_size);
3924 return make_float32(val);
3925 }
3926
frsqrt7_d(float64 f,float_status * s)3927 static float64 frsqrt7_d(float64 f, float_status *s)
3928 {
3929 int exp_size = 11, frac_size = 52;
3930 bool sign = float64_is_neg(f);
3931
3932 /*
3933 * frsqrt7(sNaN) = canonical NaN
3934 * frsqrt7(-inf) = canonical NaN
3935 * frsqrt7(-normal) = canonical NaN
3936 * frsqrt7(-subnormal) = canonical NaN
3937 */
3938 if (float64_is_signaling_nan(f, s) ||
3939 (float64_is_infinity(f) && sign) ||
3940 (float64_is_normal(f) && sign) ||
3941 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3942 s->float_exception_flags |= float_flag_invalid;
3943 return float64_default_nan(s);
3944 }
3945
3946 /* frsqrt7(qNaN) = canonical NaN */
3947 if (float64_is_quiet_nan(f, s)) {
3948 return float64_default_nan(s);
3949 }
3950
3951 /* frsqrt7(+-0) = +-inf */
3952 if (float64_is_zero(f)) {
3953 s->float_exception_flags |= float_flag_divbyzero;
3954 return float64_set_sign(float64_infinity, sign);
3955 }
3956
3957 /* frsqrt7(+inf) = +0 */
3958 if (float64_is_infinity(f) && !sign) {
3959 return float64_set_sign(float64_zero, sign);
3960 }
3961
3962 /* +normal, +subnormal */
3963 uint64_t val = frsqrt7(f, exp_size, frac_size);
3964 return make_float64(val);
3965 }
3966
RVVCALL(OPFVV1,vfrsqrt7_v_h,OP_UU_H,H2,H2,frsqrt7_h)3967 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3968 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3969 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3970 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3971 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3972 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
3973
3974 /*
3975 * Vector Floating-Point Reciprocal Estimate Instruction
3976 *
3977 * Adapted from riscv-v-spec recip.c:
3978 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3979 */
3980 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3981 float_status *s)
3982 {
3983 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3984 uint64_t exp = extract64(f, frac_size, exp_size);
3985 uint64_t frac = extract64(f, 0, frac_size);
3986
3987 const uint8_t lookup_table[] = {
3988 127, 125, 123, 121, 119, 117, 116, 114,
3989 112, 110, 109, 107, 105, 104, 102, 100,
3990 99, 97, 96, 94, 93, 91, 90, 88,
3991 87, 85, 84, 83, 81, 80, 79, 77,
3992 76, 75, 74, 72, 71, 70, 69, 68,
3993 66, 65, 64, 63, 62, 61, 60, 59,
3994 58, 57, 56, 55, 54, 53, 52, 51,
3995 50, 49, 48, 47, 46, 45, 44, 43,
3996 42, 41, 40, 40, 39, 38, 37, 36,
3997 35, 35, 34, 33, 32, 31, 31, 30,
3998 29, 28, 28, 27, 26, 25, 25, 24,
3999 23, 23, 22, 21, 21, 20, 19, 19,
4000 18, 17, 17, 16, 15, 15, 14, 14,
4001 13, 12, 12, 11, 11, 10, 9, 9,
4002 8, 8, 7, 7, 6, 5, 5, 4,
4003 4, 3, 3, 2, 2, 1, 1, 0
4004 };
4005 const int precision = 7;
4006
4007 if (exp == 0 && frac != 0) { /* subnormal */
4008 /* Normalize the subnormal. */
4009 while (extract64(frac, frac_size - 1, 1) == 0) {
4010 exp--;
4011 frac <<= 1;
4012 }
4013
4014 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
4015
4016 if (exp != 0 && exp != UINT64_MAX) {
4017 /*
4018 * Overflow to inf or max value of same sign,
4019 * depending on sign and rounding mode.
4020 */
4021 s->float_exception_flags |= (float_flag_inexact |
4022 float_flag_overflow);
4023
4024 if ((s->float_rounding_mode == float_round_to_zero) ||
4025 ((s->float_rounding_mode == float_round_down) && !sign) ||
4026 ((s->float_rounding_mode == float_round_up) && sign)) {
4027 /* Return greatest/negative finite value. */
4028 return (sign << (exp_size + frac_size)) |
4029 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
4030 } else {
4031 /* Return +-inf. */
4032 return (sign << (exp_size + frac_size)) |
4033 MAKE_64BIT_MASK(frac_size, exp_size);
4034 }
4035 }
4036 }
4037
4038 int idx = frac >> (frac_size - precision);
4039 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
4040 (frac_size - precision);
4041 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
4042
4043 if (out_exp == 0 || out_exp == UINT64_MAX) {
4044 /*
4045 * The result is subnormal, but don't raise the underflow exception,
4046 * because there's no additional loss of precision.
4047 */
4048 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
4049 if (out_exp == UINT64_MAX) {
4050 out_frac >>= 1;
4051 out_exp = 0;
4052 }
4053 }
4054
4055 uint64_t val = 0;
4056 val = deposit64(val, 0, frac_size, out_frac);
4057 val = deposit64(val, frac_size, exp_size, out_exp);
4058 val = deposit64(val, frac_size + exp_size, 1, sign);
4059 return val;
4060 }
4061
frec7_h(float16 f,float_status * s)4062 static float16 frec7_h(float16 f, float_status *s)
4063 {
4064 int exp_size = 5, frac_size = 10;
4065 bool sign = float16_is_neg(f);
4066
4067 /* frec7(+-inf) = +-0 */
4068 if (float16_is_infinity(f)) {
4069 return float16_set_sign(float16_zero, sign);
4070 }
4071
4072 /* frec7(+-0) = +-inf */
4073 if (float16_is_zero(f)) {
4074 s->float_exception_flags |= float_flag_divbyzero;
4075 return float16_set_sign(float16_infinity, sign);
4076 }
4077
4078 /* frec7(sNaN) = canonical NaN */
4079 if (float16_is_signaling_nan(f, s)) {
4080 s->float_exception_flags |= float_flag_invalid;
4081 return float16_default_nan(s);
4082 }
4083
4084 /* frec7(qNaN) = canonical NaN */
4085 if (float16_is_quiet_nan(f, s)) {
4086 return float16_default_nan(s);
4087 }
4088
4089 /* +-normal, +-subnormal */
4090 uint64_t val = frec7(f, exp_size, frac_size, s);
4091 return make_float16(val);
4092 }
4093
frec7_s(float32 f,float_status * s)4094 static float32 frec7_s(float32 f, float_status *s)
4095 {
4096 int exp_size = 8, frac_size = 23;
4097 bool sign = float32_is_neg(f);
4098
4099 /* frec7(+-inf) = +-0 */
4100 if (float32_is_infinity(f)) {
4101 return float32_set_sign(float32_zero, sign);
4102 }
4103
4104 /* frec7(+-0) = +-inf */
4105 if (float32_is_zero(f)) {
4106 s->float_exception_flags |= float_flag_divbyzero;
4107 return float32_set_sign(float32_infinity, sign);
4108 }
4109
4110 /* frec7(sNaN) = canonical NaN */
4111 if (float32_is_signaling_nan(f, s)) {
4112 s->float_exception_flags |= float_flag_invalid;
4113 return float32_default_nan(s);
4114 }
4115
4116 /* frec7(qNaN) = canonical NaN */
4117 if (float32_is_quiet_nan(f, s)) {
4118 return float32_default_nan(s);
4119 }
4120
4121 /* +-normal, +-subnormal */
4122 uint64_t val = frec7(f, exp_size, frac_size, s);
4123 return make_float32(val);
4124 }
4125
frec7_d(float64 f,float_status * s)4126 static float64 frec7_d(float64 f, float_status *s)
4127 {
4128 int exp_size = 11, frac_size = 52;
4129 bool sign = float64_is_neg(f);
4130
4131 /* frec7(+-inf) = +-0 */
4132 if (float64_is_infinity(f)) {
4133 return float64_set_sign(float64_zero, sign);
4134 }
4135
4136 /* frec7(+-0) = +-inf */
4137 if (float64_is_zero(f)) {
4138 s->float_exception_flags |= float_flag_divbyzero;
4139 return float64_set_sign(float64_infinity, sign);
4140 }
4141
4142 /* frec7(sNaN) = canonical NaN */
4143 if (float64_is_signaling_nan(f, s)) {
4144 s->float_exception_flags |= float_flag_invalid;
4145 return float64_default_nan(s);
4146 }
4147
4148 /* frec7(qNaN) = canonical NaN */
4149 if (float64_is_quiet_nan(f, s)) {
4150 return float64_default_nan(s);
4151 }
4152
4153 /* +-normal, +-subnormal */
4154 uint64_t val = frec7(f, exp_size, frac_size, s);
4155 return make_float64(val);
4156 }
4157
RVVCALL(OPFVV1,vfrec7_v_h,OP_UU_H,H2,H2,frec7_h)4158 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4159 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4160 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
4161 GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4162 GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4163 GEN_VEXT_V_ENV(vfrec7_v_d, 8)
4164
4165 /* Vector Floating-Point MIN/MAX Instructions */
4166 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4167 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4168 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
4169 GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4170 GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4171 GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
4172 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4173 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4174 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
4175 GEN_VEXT_VF(vfmin_vf_h, 2)
4176 GEN_VEXT_VF(vfmin_vf_w, 4)
4177 GEN_VEXT_VF(vfmin_vf_d, 8)
4178
4179 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4180 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4181 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
4182 GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4183 GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4184 GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
4185 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4186 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4187 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
4188 GEN_VEXT_VF(vfmax_vf_h, 2)
4189 GEN_VEXT_VF(vfmax_vf_w, 4)
4190 GEN_VEXT_VF(vfmax_vf_d, 8)
4191
4192 /* Vector Floating-Point Sign-Injection Instructions */
4193 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4194 {
4195 return deposit64(b, 0, 15, a);
4196 }
4197
fsgnj32(uint32_t a,uint32_t b,float_status * s)4198 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4199 {
4200 return deposit64(b, 0, 31, a);
4201 }
4202
fsgnj64(uint64_t a,uint64_t b,float_status * s)4203 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4204 {
4205 return deposit64(b, 0, 63, a);
4206 }
4207
RVVCALL(OPFVV2,vfsgnj_vv_h,OP_UUU_H,H2,H2,H2,fsgnj16)4208 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4209 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4210 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
4211 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4212 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4213 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
4214 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4215 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4216 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
4217 GEN_VEXT_VF(vfsgnj_vf_h, 2)
4218 GEN_VEXT_VF(vfsgnj_vf_w, 4)
4219 GEN_VEXT_VF(vfsgnj_vf_d, 8)
4220
4221 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4222 {
4223 return deposit64(~b, 0, 15, a);
4224 }
4225
fsgnjn32(uint32_t a,uint32_t b,float_status * s)4226 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4227 {
4228 return deposit64(~b, 0, 31, a);
4229 }
4230
fsgnjn64(uint64_t a,uint64_t b,float_status * s)4231 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4232 {
4233 return deposit64(~b, 0, 63, a);
4234 }
4235
RVVCALL(OPFVV2,vfsgnjn_vv_h,OP_UUU_H,H2,H2,H2,fsgnjn16)4236 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4237 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4238 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
4239 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4240 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4241 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
4242 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4243 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4244 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
4245 GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4246 GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4247 GEN_VEXT_VF(vfsgnjn_vf_d, 8)
4248
4249 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4250 {
4251 return deposit64(b ^ a, 0, 15, a);
4252 }
4253
fsgnjx32(uint32_t a,uint32_t b,float_status * s)4254 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4255 {
4256 return deposit64(b ^ a, 0, 31, a);
4257 }
4258
fsgnjx64(uint64_t a,uint64_t b,float_status * s)4259 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4260 {
4261 return deposit64(b ^ a, 0, 63, a);
4262 }
4263
RVVCALL(OPFVV2,vfsgnjx_vv_h,OP_UUU_H,H2,H2,H2,fsgnjx16)4264 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4265 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4266 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4267 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4268 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4269 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
4270 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4271 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4272 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4273 GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4274 GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4275 GEN_VEXT_VF(vfsgnjx_vf_d, 8)
4276
4277 /* Vector Floating-Point Compare Instructions */
4278 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4279 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4280 CPURISCVState *env, uint32_t desc) \
4281 { \
4282 uint32_t vm = vext_vm(desc); \
4283 uint32_t vl = env->vl; \
4284 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4285 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4286 uint32_t vma = vext_vma(desc); \
4287 uint32_t i; \
4288 \
4289 VSTART_CHECK_EARLY_EXIT(env, vl); \
4290 \
4291 for (i = env->vstart; i < vl; i++) { \
4292 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4293 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4294 if (!vm && !vext_elem_mask(v0, i)) { \
4295 /* set masked-off elements to 1s */ \
4296 if (vma) { \
4297 vext_set_elem_mask(vd, i, 1); \
4298 } \
4299 continue; \
4300 } \
4301 vext_set_elem_mask(vd, i, \
4302 DO_OP(s2, s1, &env->fp_status)); \
4303 } \
4304 env->vstart = 0; \
4305 /*
4306 * mask destination register are always tail-agnostic
4307 * set tail elements to 1s
4308 */ \
4309 if (vta_all_1s) { \
4310 for (; i < total_elems; i++) { \
4311 vext_set_elem_mask(vd, i, 1); \
4312 } \
4313 } \
4314 }
4315
4316 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4317 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4318 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4319
4320 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4321 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4322 CPURISCVState *env, uint32_t desc) \
4323 { \
4324 uint32_t vm = vext_vm(desc); \
4325 uint32_t vl = env->vl; \
4326 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4327 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4328 uint32_t vma = vext_vma(desc); \
4329 uint32_t i; \
4330 \
4331 VSTART_CHECK_EARLY_EXIT(env, vl); \
4332 \
4333 for (i = env->vstart; i < vl; i++) { \
4334 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4335 if (!vm && !vext_elem_mask(v0, i)) { \
4336 /* set masked-off elements to 1s */ \
4337 if (vma) { \
4338 vext_set_elem_mask(vd, i, 1); \
4339 } \
4340 continue; \
4341 } \
4342 vext_set_elem_mask(vd, i, \
4343 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4344 } \
4345 env->vstart = 0; \
4346 /*
4347 * mask destination register are always tail-agnostic
4348 * set tail elements to 1s
4349 */ \
4350 if (vta_all_1s) { \
4351 for (; i < total_elems; i++) { \
4352 vext_set_elem_mask(vd, i, 1); \
4353 } \
4354 } \
4355 }
4356
4357 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4358 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4359 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4360
4361 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4362 {
4363 FloatRelation compare = float16_compare_quiet(a, b, s);
4364 return compare != float_relation_equal;
4365 }
4366
vmfne32(uint32_t a,uint32_t b,float_status * s)4367 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4368 {
4369 FloatRelation compare = float32_compare_quiet(a, b, s);
4370 return compare != float_relation_equal;
4371 }
4372
vmfne64(uint64_t a,uint64_t b,float_status * s)4373 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4374 {
4375 FloatRelation compare = float64_compare_quiet(a, b, s);
4376 return compare != float_relation_equal;
4377 }
4378
GEN_VEXT_CMP_VV_ENV(vmfne_vv_h,uint16_t,H2,vmfne16)4379 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4380 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4381 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4382 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4383 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4384 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4385
4386 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4387 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4388 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4389 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4390 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4391 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4392
4393 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4394 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4395 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4396 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4397 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4398 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4399
4400 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4401 {
4402 FloatRelation compare = float16_compare(a, b, s);
4403 return compare == float_relation_greater;
4404 }
4405
vmfgt32(uint32_t a,uint32_t b,float_status * s)4406 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4407 {
4408 FloatRelation compare = float32_compare(a, b, s);
4409 return compare == float_relation_greater;
4410 }
4411
vmfgt64(uint64_t a,uint64_t b,float_status * s)4412 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4413 {
4414 FloatRelation compare = float64_compare(a, b, s);
4415 return compare == float_relation_greater;
4416 }
4417
GEN_VEXT_CMP_VF(vmfgt_vf_h,uint16_t,H2,vmfgt16)4418 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4419 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4420 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4421
4422 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4423 {
4424 FloatRelation compare = float16_compare(a, b, s);
4425 return compare == float_relation_greater ||
4426 compare == float_relation_equal;
4427 }
4428
vmfge32(uint32_t a,uint32_t b,float_status * s)4429 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4430 {
4431 FloatRelation compare = float32_compare(a, b, s);
4432 return compare == float_relation_greater ||
4433 compare == float_relation_equal;
4434 }
4435
vmfge64(uint64_t a,uint64_t b,float_status * s)4436 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4437 {
4438 FloatRelation compare = float64_compare(a, b, s);
4439 return compare == float_relation_greater ||
4440 compare == float_relation_equal;
4441 }
4442
GEN_VEXT_CMP_VF(vmfge_vf_h,uint16_t,H2,vmfge16)4443 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4444 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4445 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4446
4447 /* Vector Floating-Point Classify Instruction */
4448 target_ulong fclass_h(uint64_t frs1)
4449 {
4450 float16 f = frs1;
4451 bool sign = float16_is_neg(f);
4452
4453 if (float16_is_infinity(f)) {
4454 return sign ? 1 << 0 : 1 << 7;
4455 } else if (float16_is_zero(f)) {
4456 return sign ? 1 << 3 : 1 << 4;
4457 } else if (float16_is_zero_or_denormal(f)) {
4458 return sign ? 1 << 2 : 1 << 5;
4459 } else if (float16_is_any_nan(f)) {
4460 float_status s = { }; /* for snan_bit_is_one */
4461 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4462 } else {
4463 return sign ? 1 << 1 : 1 << 6;
4464 }
4465 }
4466
fclass_s(uint64_t frs1)4467 target_ulong fclass_s(uint64_t frs1)
4468 {
4469 float32 f = frs1;
4470 bool sign = float32_is_neg(f);
4471
4472 if (float32_is_infinity(f)) {
4473 return sign ? 1 << 0 : 1 << 7;
4474 } else if (float32_is_zero(f)) {
4475 return sign ? 1 << 3 : 1 << 4;
4476 } else if (float32_is_zero_or_denormal(f)) {
4477 return sign ? 1 << 2 : 1 << 5;
4478 } else if (float32_is_any_nan(f)) {
4479 float_status s = { }; /* for snan_bit_is_one */
4480 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4481 } else {
4482 return sign ? 1 << 1 : 1 << 6;
4483 }
4484 }
4485
fclass_d(uint64_t frs1)4486 target_ulong fclass_d(uint64_t frs1)
4487 {
4488 float64 f = frs1;
4489 bool sign = float64_is_neg(f);
4490
4491 if (float64_is_infinity(f)) {
4492 return sign ? 1 << 0 : 1 << 7;
4493 } else if (float64_is_zero(f)) {
4494 return sign ? 1 << 3 : 1 << 4;
4495 } else if (float64_is_zero_or_denormal(f)) {
4496 return sign ? 1 << 2 : 1 << 5;
4497 } else if (float64_is_any_nan(f)) {
4498 float_status s = { }; /* for snan_bit_is_one */
4499 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4500 } else {
4501 return sign ? 1 << 1 : 1 << 6;
4502 }
4503 }
4504
RVVCALL(OPIVV1,vfclass_v_h,OP_UU_H,H2,H2,fclass_h)4505 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4506 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4507 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4508 GEN_VEXT_V(vfclass_v_h, 2)
4509 GEN_VEXT_V(vfclass_v_w, 4)
4510 GEN_VEXT_V(vfclass_v_d, 8)
4511
4512 /* Vector Floating-Point Merge Instruction */
4513
4514 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4515 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4516 CPURISCVState *env, uint32_t desc) \
4517 { \
4518 uint32_t vm = vext_vm(desc); \
4519 uint32_t vl = env->vl; \
4520 uint32_t esz = sizeof(ETYPE); \
4521 uint32_t total_elems = \
4522 vext_get_total_elems(env, desc, esz); \
4523 uint32_t vta = vext_vta(desc); \
4524 uint32_t i; \
4525 \
4526 VSTART_CHECK_EARLY_EXIT(env, vl); \
4527 \
4528 for (i = env->vstart; i < vl; i++) { \
4529 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4530 *((ETYPE *)vd + H(i)) = \
4531 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4532 } \
4533 env->vstart = 0; \
4534 /* set tail elements to 1s */ \
4535 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4536 }
4537
4538 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4539 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4540 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4541
4542 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4543 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4544 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4545 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4546 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4547 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4548 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4549 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
4550
4551 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4552 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4553 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4554 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4555 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4556 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4557 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
4558
4559 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4560 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4561 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4562 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4563 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4564 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4565 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
4566
4567 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4568 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4569 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4570 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4571 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4572 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4573 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4574
4575 /* Widening Floating-Point/Integer Type-Convert Instructions */
4576 /* (TD, T2, TX2) */
4577 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4578 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4579 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4580 /*
4581 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4582 */
4583 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4584 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4585 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4586 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4587
4588 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4589 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4590 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4591 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4592 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4593
4594 /*
4595 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4596 */
4597 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4598 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4599 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4600 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4601 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4602 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4603
4604 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4605 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4606 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4607 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4608 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4609 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4610 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4611
4612 /*
4613 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4614 */
4615 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4616 {
4617 return float16_to_float32(a, true, s);
4618 }
4619
RVVCALL(OPFVV1,vfwcvt_f_f_v_h,WOP_UU_H,H4,H2,vfwcvtffv16)4620 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4621 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4622 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4623 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
4624
4625 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4626 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4627
4628 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4629 /* (TD, T2, TX2) */
4630 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4631 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4632 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4633 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4634 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4635 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4636 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4637 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4638 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4639 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
4640
4641 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4642 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4643 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4644 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4645 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4646 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4647 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
4648
4649 /*
4650 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4651 */
4652 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4653 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4654 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4655 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
4656
4657 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4658 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4659 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4660 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4661 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
4662
4663 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4664 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4665 {
4666 return float32_to_float16(a, true, s);
4667 }
4668
RVVCALL(OPFVV1,vfncvt_f_f_w_h,NOP_UU_H,H2,H4,vfncvtffv16)4669 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4670 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4671 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4672 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
4673
4674 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4675 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4676
4677 /*
4678 * Vector Reduction Operations
4679 */
4680 /* Vector Single-Width Integer Reduction Instructions */
4681 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4682 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4683 void *vs2, CPURISCVState *env, \
4684 uint32_t desc) \
4685 { \
4686 uint32_t vm = vext_vm(desc); \
4687 uint32_t vl = env->vl; \
4688 uint32_t esz = sizeof(TD); \
4689 uint32_t vlenb = simd_maxsz(desc); \
4690 uint32_t vta = vext_vta(desc); \
4691 uint32_t i; \
4692 TD s1 = *((TD *)vs1 + HD(0)); \
4693 \
4694 VSTART_CHECK_EARLY_EXIT(env, vl); \
4695 \
4696 for (i = env->vstart; i < vl; i++) { \
4697 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4698 if (!vm && !vext_elem_mask(v0, i)) { \
4699 continue; \
4700 } \
4701 s1 = OP(s1, (TD)s2); \
4702 } \
4703 if (vl > 0) { \
4704 *((TD *)vd + HD(0)) = s1; \
4705 } \
4706 env->vstart = 0; \
4707 /* set tail elements to 1s */ \
4708 vext_set_elems_1s(vd, vta, esz, vlenb); \
4709 }
4710
4711 /* vd[0] = sum(vs1[0], vs2[*]) */
4712 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4713 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4714 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4715 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4716
4717 /* vd[0] = maxu(vs1[0], vs2[*]) */
4718 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4719 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4720 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4721 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4722
4723 /* vd[0] = max(vs1[0], vs2[*]) */
4724 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4725 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4726 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4727 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4728
4729 /* vd[0] = minu(vs1[0], vs2[*]) */
4730 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4731 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4732 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4733 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4734
4735 /* vd[0] = min(vs1[0], vs2[*]) */
4736 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4737 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4738 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4739 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4740
4741 /* vd[0] = and(vs1[0], vs2[*]) */
4742 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4743 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4744 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4745 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4746
4747 /* vd[0] = or(vs1[0], vs2[*]) */
4748 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4749 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4750 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4751 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4752
4753 /* vd[0] = xor(vs1[0], vs2[*]) */
4754 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4755 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4756 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4757 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4758
4759 /* Vector Widening Integer Reduction Instructions */
4760 /* signed sum reduction into double-width accumulator */
4761 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4762 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4763 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4764
4765 /* Unsigned sum reduction into double-width accumulator */
4766 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4767 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4768 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4769
4770 /* Vector Single-Width Floating-Point Reduction Instructions */
4771 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4772 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4773 void *vs2, CPURISCVState *env, \
4774 uint32_t desc) \
4775 { \
4776 uint32_t vm = vext_vm(desc); \
4777 uint32_t vl = env->vl; \
4778 uint32_t esz = sizeof(TD); \
4779 uint32_t vlenb = simd_maxsz(desc); \
4780 uint32_t vta = vext_vta(desc); \
4781 uint32_t i; \
4782 TD s1 = *((TD *)vs1 + HD(0)); \
4783 \
4784 VSTART_CHECK_EARLY_EXIT(env, vl); \
4785 \
4786 for (i = env->vstart; i < vl; i++) { \
4787 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4788 if (!vm && !vext_elem_mask(v0, i)) { \
4789 continue; \
4790 } \
4791 s1 = OP(s1, (TD)s2, &env->fp_status); \
4792 } \
4793 if (vl > 0) { \
4794 *((TD *)vd + HD(0)) = s1; \
4795 } \
4796 env->vstart = 0; \
4797 /* set tail elements to 1s */ \
4798 vext_set_elems_1s(vd, vta, esz, vlenb); \
4799 }
4800
4801 /* Unordered sum */
4802 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4803 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4804 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4805
4806 /* Ordered sum */
4807 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4808 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4809 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4810
4811 /* Maximum value */
4812 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4813 float16_maximum_number)
4814 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4815 float32_maximum_number)
4816 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4817 float64_maximum_number)
4818
4819 /* Minimum value */
4820 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4821 float16_minimum_number)
4822 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4823 float32_minimum_number)
4824 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4825 float64_minimum_number)
4826
4827 /* Vector Widening Floating-Point Add Instructions */
4828 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
4829 {
4830 return float32_add(a, float16_to_float32(b, true, s), s);
4831 }
4832
fwadd32(uint64_t a,uint32_t b,float_status * s)4833 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
4834 {
4835 return float64_add(a, float32_to_float64(b, s), s);
4836 }
4837
4838 /* Vector Widening Floating-Point Reduction Instructions */
4839 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
GEN_VEXT_FRED(vfwredusum_vs_h,uint32_t,uint16_t,H4,H2,fwadd16)4840 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4841 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4842 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4843 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4844
4845 /*
4846 * Vector Mask Operations
4847 */
4848 /* Vector Mask-Register Logical Instructions */
4849 #define GEN_VEXT_MASK_VV(NAME, OP) \
4850 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4851 void *vs2, CPURISCVState *env, \
4852 uint32_t desc) \
4853 { \
4854 uint32_t vl = env->vl; \
4855 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
4856 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4857 uint32_t i; \
4858 int a, b; \
4859 \
4860 VSTART_CHECK_EARLY_EXIT(env, vl); \
4861 \
4862 for (i = env->vstart; i < vl; i++) { \
4863 a = vext_elem_mask(vs1, i); \
4864 b = vext_elem_mask(vs2, i); \
4865 vext_set_elem_mask(vd, i, OP(b, a)); \
4866 } \
4867 env->vstart = 0; \
4868 /*
4869 * mask destination register are always tail-agnostic
4870 * set tail elements to 1s
4871 */ \
4872 if (vta_all_1s) { \
4873 for (; i < total_elems; i++) { \
4874 vext_set_elem_mask(vd, i, 1); \
4875 } \
4876 } \
4877 }
4878
4879 #define DO_NAND(N, M) (!(N & M))
4880 #define DO_ANDNOT(N, M) (N & !M)
4881 #define DO_NOR(N, M) (!(N | M))
4882 #define DO_ORNOT(N, M) (N | !M)
4883 #define DO_XNOR(N, M) (!(N ^ M))
4884
4885 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4886 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4887 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4888 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4889 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4890 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4891 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4892 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4893
4894 /* Vector count population in mask vcpop */
4895 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4896 uint32_t desc)
4897 {
4898 target_ulong cnt = 0;
4899 uint32_t vm = vext_vm(desc);
4900 uint32_t vl = env->vl;
4901 int i;
4902
4903 for (i = env->vstart; i < vl; i++) {
4904 if (vm || vext_elem_mask(v0, i)) {
4905 if (vext_elem_mask(vs2, i)) {
4906 cnt++;
4907 }
4908 }
4909 }
4910 env->vstart = 0;
4911 return cnt;
4912 }
4913
4914 /* vfirst find-first-set mask bit */
HELPER(vfirst_m)4915 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4916 uint32_t desc)
4917 {
4918 uint32_t vm = vext_vm(desc);
4919 uint32_t vl = env->vl;
4920 int i;
4921
4922 for (i = env->vstart; i < vl; i++) {
4923 if (vm || vext_elem_mask(v0, i)) {
4924 if (vext_elem_mask(vs2, i)) {
4925 return i;
4926 }
4927 }
4928 }
4929 env->vstart = 0;
4930 return -1LL;
4931 }
4932
4933 enum set_mask_type {
4934 ONLY_FIRST = 1,
4935 INCLUDE_FIRST,
4936 BEFORE_FIRST,
4937 };
4938
vmsetm(void * vd,void * v0,void * vs2,CPURISCVState * env,uint32_t desc,enum set_mask_type type)4939 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4940 uint32_t desc, enum set_mask_type type)
4941 {
4942 uint32_t vm = vext_vm(desc);
4943 uint32_t vl = env->vl;
4944 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
4945 uint32_t vta_all_1s = vext_vta_all_1s(desc);
4946 uint32_t vma = vext_vma(desc);
4947 int i;
4948 bool first_mask_bit = false;
4949
4950 VSTART_CHECK_EARLY_EXIT(env, vl);
4951
4952 for (i = env->vstart; i < vl; i++) {
4953 if (!vm && !vext_elem_mask(v0, i)) {
4954 /* set masked-off elements to 1s */
4955 if (vma) {
4956 vext_set_elem_mask(vd, i, 1);
4957 }
4958 continue;
4959 }
4960 /* write a zero to all following active elements */
4961 if (first_mask_bit) {
4962 vext_set_elem_mask(vd, i, 0);
4963 continue;
4964 }
4965 if (vext_elem_mask(vs2, i)) {
4966 first_mask_bit = true;
4967 if (type == BEFORE_FIRST) {
4968 vext_set_elem_mask(vd, i, 0);
4969 } else {
4970 vext_set_elem_mask(vd, i, 1);
4971 }
4972 } else {
4973 if (type == ONLY_FIRST) {
4974 vext_set_elem_mask(vd, i, 0);
4975 } else {
4976 vext_set_elem_mask(vd, i, 1);
4977 }
4978 }
4979 }
4980 env->vstart = 0;
4981 /*
4982 * mask destination register are always tail-agnostic
4983 * set tail elements to 1s
4984 */
4985 if (vta_all_1s) {
4986 for (; i < total_elems; i++) {
4987 vext_set_elem_mask(vd, i, 1);
4988 }
4989 }
4990 }
4991
HELPER(vmsbf_m)4992 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4993 uint32_t desc)
4994 {
4995 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4996 }
4997
HELPER(vmsif_m)4998 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4999 uint32_t desc)
5000 {
5001 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
5002 }
5003
HELPER(vmsof_m)5004 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
5005 uint32_t desc)
5006 {
5007 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
5008 }
5009
5010 /* Vector Iota Instruction */
5011 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
5012 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
5013 uint32_t desc) \
5014 { \
5015 uint32_t vm = vext_vm(desc); \
5016 uint32_t vl = env->vl; \
5017 uint32_t esz = sizeof(ETYPE); \
5018 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5019 uint32_t vta = vext_vta(desc); \
5020 uint32_t vma = vext_vma(desc); \
5021 uint32_t sum = 0; \
5022 int i; \
5023 \
5024 VSTART_CHECK_EARLY_EXIT(env, vl); \
5025 \
5026 for (i = env->vstart; i < vl; i++) { \
5027 if (!vm && !vext_elem_mask(v0, i)) { \
5028 /* set masked-off elements to 1s */ \
5029 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5030 continue; \
5031 } \
5032 *((ETYPE *)vd + H(i)) = sum; \
5033 if (vext_elem_mask(vs2, i)) { \
5034 sum++; \
5035 } \
5036 } \
5037 env->vstart = 0; \
5038 /* set tail elements to 1s */ \
5039 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5040 }
5041
GEN_VEXT_VIOTA_M(viota_m_b,uint8_t,H1)5042 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
5043 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
5044 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
5045 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
5046
5047 /* Vector Element Index Instruction */
5048 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
5049 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
5050 { \
5051 uint32_t vm = vext_vm(desc); \
5052 uint32_t vl = env->vl; \
5053 uint32_t esz = sizeof(ETYPE); \
5054 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5055 uint32_t vta = vext_vta(desc); \
5056 uint32_t vma = vext_vma(desc); \
5057 int i; \
5058 \
5059 VSTART_CHECK_EARLY_EXIT(env, vl); \
5060 \
5061 for (i = env->vstart; i < vl; i++) { \
5062 if (!vm && !vext_elem_mask(v0, i)) { \
5063 /* set masked-off elements to 1s */ \
5064 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5065 continue; \
5066 } \
5067 *((ETYPE *)vd + H(i)) = i; \
5068 } \
5069 env->vstart = 0; \
5070 /* set tail elements to 1s */ \
5071 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5072 }
5073
5074 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
5075 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
5076 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
5077 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
5078
5079 /*
5080 * Vector Permutation Instructions
5081 */
5082
5083 /* Vector Slide Instructions */
5084 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
5085 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5086 CPURISCVState *env, uint32_t desc) \
5087 { \
5088 uint32_t vm = vext_vm(desc); \
5089 uint32_t vl = env->vl; \
5090 uint32_t esz = sizeof(ETYPE); \
5091 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5092 uint32_t vta = vext_vta(desc); \
5093 uint32_t vma = vext_vma(desc); \
5094 target_ulong offset = s1, i_min, i; \
5095 \
5096 VSTART_CHECK_EARLY_EXIT(env, vl); \
5097 \
5098 i_min = MAX(env->vstart, offset); \
5099 for (i = i_min; i < vl; i++) { \
5100 if (!vm && !vext_elem_mask(v0, i)) { \
5101 /* set masked-off elements to 1s */ \
5102 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5103 continue; \
5104 } \
5105 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
5106 } \
5107 env->vstart = 0; \
5108 /* set tail elements to 1s */ \
5109 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5110 }
5111
5112 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
5113 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
5114 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
5115 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
5116 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
5117
5118 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
5119 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5120 CPURISCVState *env, uint32_t desc) \
5121 { \
5122 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5123 uint32_t vm = vext_vm(desc); \
5124 uint32_t vl = env->vl; \
5125 uint32_t esz = sizeof(ETYPE); \
5126 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5127 uint32_t vta = vext_vta(desc); \
5128 uint32_t vma = vext_vma(desc); \
5129 target_ulong i_max, i_min, i; \
5130 \
5131 VSTART_CHECK_EARLY_EXIT(env, vl); \
5132 \
5133 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
5134 i_max = MAX(i_min, env->vstart); \
5135 for (i = env->vstart; i < i_max; ++i) { \
5136 if (!vm && !vext_elem_mask(v0, i)) { \
5137 /* set masked-off elements to 1s */ \
5138 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5139 continue; \
5140 } \
5141 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
5142 } \
5143 \
5144 for (i = i_max; i < vl; ++i) { \
5145 if (!vm && !vext_elem_mask(v0, i)) { \
5146 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5147 continue; \
5148 } \
5149 *((ETYPE *)vd + H(i)) = 0; \
5150 } \
5151 \
5152 env->vstart = 0; \
5153 /* set tail elements to 1s */ \
5154 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5155 }
5156
5157 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
5158 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5159 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5160 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5161 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
5162
5163 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
5164 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5165 void *vs2, CPURISCVState *env, \
5166 uint32_t desc) \
5167 { \
5168 typedef uint##BITWIDTH##_t ETYPE; \
5169 uint32_t vm = vext_vm(desc); \
5170 uint32_t vl = env->vl; \
5171 uint32_t esz = sizeof(ETYPE); \
5172 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5173 uint32_t vta = vext_vta(desc); \
5174 uint32_t vma = vext_vma(desc); \
5175 uint32_t i; \
5176 \
5177 VSTART_CHECK_EARLY_EXIT(env, vl); \
5178 \
5179 for (i = env->vstart; i < vl; i++) { \
5180 if (!vm && !vext_elem_mask(v0, i)) { \
5181 /* set masked-off elements to 1s */ \
5182 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5183 continue; \
5184 } \
5185 if (i == 0) { \
5186 *((ETYPE *)vd + H(i)) = s1; \
5187 } else { \
5188 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5189 } \
5190 } \
5191 env->vstart = 0; \
5192 /* set tail elements to 1s */ \
5193 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5194 }
5195
5196 GEN_VEXT_VSLIE1UP(8, H1)
5197 GEN_VEXT_VSLIE1UP(16, H2)
5198 GEN_VEXT_VSLIE1UP(32, H4)
5199 GEN_VEXT_VSLIE1UP(64, H8)
5200
5201 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
5202 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5203 CPURISCVState *env, uint32_t desc) \
5204 { \
5205 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5206 }
5207
5208 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
5209 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5210 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5211 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5212 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5213
5214 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
5215 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5216 void *vs2, CPURISCVState *env, \
5217 uint32_t desc) \
5218 { \
5219 typedef uint##BITWIDTH##_t ETYPE; \
5220 uint32_t vm = vext_vm(desc); \
5221 uint32_t vl = env->vl; \
5222 uint32_t esz = sizeof(ETYPE); \
5223 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5224 uint32_t vta = vext_vta(desc); \
5225 uint32_t vma = vext_vma(desc); \
5226 uint32_t i; \
5227 \
5228 VSTART_CHECK_EARLY_EXIT(env, vl); \
5229 \
5230 for (i = env->vstart; i < vl; i++) { \
5231 if (!vm && !vext_elem_mask(v0, i)) { \
5232 /* set masked-off elements to 1s */ \
5233 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5234 continue; \
5235 } \
5236 if (i == vl - 1) { \
5237 *((ETYPE *)vd + H(i)) = s1; \
5238 } else { \
5239 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5240 } \
5241 } \
5242 env->vstart = 0; \
5243 /* set tail elements to 1s */ \
5244 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5245 }
5246
5247 GEN_VEXT_VSLIDE1DOWN(8, H1)
5248 GEN_VEXT_VSLIDE1DOWN(16, H2)
5249 GEN_VEXT_VSLIDE1DOWN(32, H4)
5250 GEN_VEXT_VSLIDE1DOWN(64, H8)
5251
5252 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
5253 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5254 CPURISCVState *env, uint32_t desc) \
5255 { \
5256 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5257 }
5258
5259 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
5260 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5261 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5262 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5263 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5264
5265 /* Vector Floating-Point Slide Instructions */
5266 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
5267 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5268 CPURISCVState *env, uint32_t desc) \
5269 { \
5270 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5271 }
5272
5273 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5274 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5275 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5276 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5277
5278 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
5279 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5280 CPURISCVState *env, uint32_t desc) \
5281 { \
5282 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5283 }
5284
5285 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5286 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5287 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5288 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
5289
5290 /* Vector Register Gather Instruction */
5291 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
5292 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5293 CPURISCVState *env, uint32_t desc) \
5294 { \
5295 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
5296 uint32_t vm = vext_vm(desc); \
5297 uint32_t vl = env->vl; \
5298 uint32_t esz = sizeof(TS2); \
5299 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5300 uint32_t vta = vext_vta(desc); \
5301 uint32_t vma = vext_vma(desc); \
5302 uint64_t index; \
5303 uint32_t i; \
5304 \
5305 VSTART_CHECK_EARLY_EXIT(env, vl); \
5306 \
5307 for (i = env->vstart; i < vl; i++) { \
5308 if (!vm && !vext_elem_mask(v0, i)) { \
5309 /* set masked-off elements to 1s */ \
5310 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5311 continue; \
5312 } \
5313 index = *((TS1 *)vs1 + HS1(i)); \
5314 if (index >= vlmax) { \
5315 *((TS2 *)vd + HS2(i)) = 0; \
5316 } else { \
5317 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
5318 } \
5319 } \
5320 env->vstart = 0; \
5321 /* set tail elements to 1s */ \
5322 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5323 }
5324
5325 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
5326 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5327 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5328 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5329 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5330
5331 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5332 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5333 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5334 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5335
5336 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
5337 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5338 CPURISCVState *env, uint32_t desc) \
5339 { \
5340 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5341 uint32_t vm = vext_vm(desc); \
5342 uint32_t vl = env->vl; \
5343 uint32_t esz = sizeof(ETYPE); \
5344 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5345 uint32_t vta = vext_vta(desc); \
5346 uint32_t vma = vext_vma(desc); \
5347 uint64_t index = s1; \
5348 uint32_t i; \
5349 \
5350 VSTART_CHECK_EARLY_EXIT(env, vl); \
5351 \
5352 for (i = env->vstart; i < vl; i++) { \
5353 if (!vm && !vext_elem_mask(v0, i)) { \
5354 /* set masked-off elements to 1s */ \
5355 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5356 continue; \
5357 } \
5358 if (index >= vlmax) { \
5359 *((ETYPE *)vd + H(i)) = 0; \
5360 } else { \
5361 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5362 } \
5363 } \
5364 env->vstart = 0; \
5365 /* set tail elements to 1s */ \
5366 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5367 }
5368
5369 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
5370 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5371 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5372 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5373 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
5374
5375 /* Vector Compress Instruction */
5376 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
5377 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5378 CPURISCVState *env, uint32_t desc) \
5379 { \
5380 uint32_t vl = env->vl; \
5381 uint32_t esz = sizeof(ETYPE); \
5382 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5383 uint32_t vta = vext_vta(desc); \
5384 uint32_t num = 0, i; \
5385 \
5386 VSTART_CHECK_EARLY_EXIT(env, vl); \
5387 \
5388 for (i = env->vstart; i < vl; i++) { \
5389 if (!vext_elem_mask(vs1, i)) { \
5390 continue; \
5391 } \
5392 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5393 num++; \
5394 } \
5395 env->vstart = 0; \
5396 /* set tail elements to 1s */ \
5397 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \
5398 }
5399
5400 /* Compress into vd elements of vs2 where vs1 is enabled */
5401 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5402 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5403 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5404 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5405
5406 /* Vector Whole Register Move */
5407 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5408 {
5409 /* EEW = SEW */
5410 uint32_t maxsz = simd_maxsz(desc);
5411 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5412 uint32_t startb = env->vstart * sewb;
5413 uint32_t i = startb;
5414
5415 if (startb >= maxsz) {
5416 env->vstart = 0;
5417 return;
5418 }
5419
5420 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5421 uint32_t j = ROUND_UP(i, 8);
5422 memcpy((uint8_t *)vd + H1(j - 1),
5423 (uint8_t *)vs2 + H1(j - 1),
5424 j - i);
5425 i = j;
5426 }
5427
5428 memcpy((uint8_t *)vd + H1(i),
5429 (uint8_t *)vs2 + H1(i),
5430 maxsz - i);
5431
5432 env->vstart = 0;
5433 }
5434
5435 /* Vector Integer Extension */
5436 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5437 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5438 CPURISCVState *env, uint32_t desc) \
5439 { \
5440 uint32_t vl = env->vl; \
5441 uint32_t vm = vext_vm(desc); \
5442 uint32_t esz = sizeof(ETYPE); \
5443 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5444 uint32_t vta = vext_vta(desc); \
5445 uint32_t vma = vext_vma(desc); \
5446 uint32_t i; \
5447 \
5448 VSTART_CHECK_EARLY_EXIT(env, vl); \
5449 \
5450 for (i = env->vstart; i < vl; i++) { \
5451 if (!vm && !vext_elem_mask(v0, i)) { \
5452 /* set masked-off elements to 1s */ \
5453 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5454 continue; \
5455 } \
5456 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5457 } \
5458 env->vstart = 0; \
5459 /* set tail elements to 1s */ \
5460 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5461 }
5462
5463 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5464 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5465 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5466 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5467 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5468 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5469
5470 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5471 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5472 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5473 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5474 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5475 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)
5476