1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/cpu_ldst.h"
26 #include "exec/page-protection.h"
27 #include "exec/helper-proto.h"
28 #include "fpu/softfloat.h"
29 #include "tcg/tcg-gvec-desc.h"
30 #include "internals.h"
31 #include "vector_internals.h"
32 #include <math.h>
33
HELPER(vsetvl)34 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
35 target_ulong s2)
36 {
37 int vlmax, vl;
38 RISCVCPU *cpu = env_archcpu(env);
39 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
40 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
41 uint16_t sew = 8 << vsew;
42 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
43 int xlen = riscv_cpu_xlen(env);
44 bool vill = (s2 >> (xlen - 1)) & 0x1;
45 target_ulong reserved = s2 &
46 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
47 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
48 uint16_t vlen = cpu->cfg.vlenb << 3;
49 int8_t lmul;
50
51 if (vlmul & 4) {
52 /*
53 * Fractional LMUL, check:
54 *
55 * VLEN * LMUL >= SEW
56 * VLEN >> (8 - lmul) >= sew
57 * (vlenb << 3) >> (8 - lmul) >= sew
58 */
59 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
60 vill = true;
61 }
62 }
63
64 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
65 /* only set vill bit. */
66 env->vill = 1;
67 env->vtype = 0;
68 env->vl = 0;
69 env->vstart = 0;
70 return 0;
71 }
72
73 /* lmul encoded as in DisasContext::lmul */
74 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
75 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
76 if (s1 <= vlmax) {
77 vl = s1;
78 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) {
79 vl = (s1 + 1) >> 1;
80 } else {
81 vl = vlmax;
82 }
83 env->vl = vl;
84 env->vtype = s2;
85 env->vstart = 0;
86 env->vill = 0;
87 return vl;
88 }
89
90 /*
91 * Get the maximum number of elements can be operated.
92 *
93 * log2_esz: log2 of element size in bytes.
94 */
vext_max_elems(uint32_t desc,uint32_t log2_esz)95 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
96 {
97 /*
98 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
99 * so vlen in bytes (vlenb) is encoded as maxsz.
100 */
101 uint32_t vlenb = simd_maxsz(desc);
102
103 /* Return VLMAX */
104 int scale = vext_lmul(desc) - log2_esz;
105 return scale < 0 ? vlenb >> -scale : vlenb << scale;
106 }
107
adjust_addr(CPURISCVState * env,target_ulong addr)108 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
109 {
110 return (addr & ~env->cur_pmmask) | env->cur_pmbase;
111 }
112
113 /*
114 * This function checks watchpoint before real load operation.
115 *
116 * In system mode, the TLB API probe_access is enough for watchpoint check.
117 * In user mode, there is no watchpoint support now.
118 *
119 * It will trigger an exception if there is no mapping in TLB
120 * and page table walk can't fill the TLB entry. Then the guest
121 * software can return here after process the exception or never return.
122 */
probe_pages(CPURISCVState * env,target_ulong addr,target_ulong len,uintptr_t ra,MMUAccessType access_type)123 static void probe_pages(CPURISCVState *env, target_ulong addr,
124 target_ulong len, uintptr_t ra,
125 MMUAccessType access_type)
126 {
127 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
128 target_ulong curlen = MIN(pagelen, len);
129 int mmu_index = riscv_env_mmu_index(env, false);
130
131 probe_access(env, adjust_addr(env, addr), curlen, access_type,
132 mmu_index, ra);
133 if (len > curlen) {
134 addr += curlen;
135 curlen = len - curlen;
136 probe_access(env, adjust_addr(env, addr), curlen, access_type,
137 mmu_index, ra);
138 }
139 }
140
vext_set_elem_mask(void * v0,int index,uint8_t value)141 static inline void vext_set_elem_mask(void *v0, int index,
142 uint8_t value)
143 {
144 int idx = index / 64;
145 int pos = index % 64;
146 uint64_t old = ((uint64_t *)v0)[idx];
147 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
148 }
149
150 /* elements operations for load and store */
151 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr,
152 uint32_t idx, void *vd, uintptr_t retaddr);
153 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host);
154
155 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
156 static inline QEMU_ALWAYS_INLINE \
157 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
158 uint32_t idx, void *vd, uintptr_t retaddr) \
159 { \
160 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
161 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
162 } \
163 \
164 static inline QEMU_ALWAYS_INLINE \
165 void NAME##_host(void *vd, uint32_t idx, void *host) \
166 { \
167 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
168 *cur = (ETYPE)LDSUF##_p(host); \
169 }
170
GEN_VEXT_LD_ELEM(lde_b,uint8_t,H1,ldub)171 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub)
172 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw)
173 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl)
174 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq)
175
176 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
177 static inline QEMU_ALWAYS_INLINE \
178 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
179 uint32_t idx, void *vd, uintptr_t retaddr) \
180 { \
181 ETYPE data = *((ETYPE *)vd + H(idx)); \
182 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
183 } \
184 \
185 static inline QEMU_ALWAYS_INLINE \
186 void NAME##_host(void *vd, uint32_t idx, void *host) \
187 { \
188 ETYPE data = *((ETYPE *)vd + H(idx)); \
189 STSUF##_p(host, data); \
190 }
191
192 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb)
193 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw)
194 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl)
195 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq)
196
197 static inline QEMU_ALWAYS_INLINE void
198 vext_continus_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb,
199 void *vd, uint32_t evl, target_ulong addr,
200 uint32_t reg_start, uintptr_t ra, uint32_t esz,
201 bool is_load)
202 {
203 uint32_t i;
204 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) {
205 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra);
206 }
207 }
208
209 static inline QEMU_ALWAYS_INLINE void
vext_continus_ldst_host(CPURISCVState * env,vext_ldst_elem_fn_host * ldst_host,void * vd,uint32_t evl,uint32_t reg_start,void * host,uint32_t esz,bool is_load)210 vext_continus_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host,
211 void *vd, uint32_t evl, uint32_t reg_start, void *host,
212 uint32_t esz, bool is_load)
213 {
214 #if HOST_BIG_ENDIAN
215 for (; reg_start < evl; reg_start++, host += esz) {
216 ldst_host(vd, reg_start, host);
217 }
218 #else
219 if (esz == 1) {
220 uint32_t byte_offset = reg_start * esz;
221 uint32_t size = (evl - reg_start) * esz;
222
223 if (is_load) {
224 memcpy(vd + byte_offset, host, size);
225 } else {
226 memcpy(host, vd + byte_offset, size);
227 }
228 } else {
229 for (; reg_start < evl; reg_start++, host += esz) {
230 ldst_host(vd, reg_start, host);
231 }
232 }
233 #endif
234 }
235
vext_set_tail_elems_1s(target_ulong vl,void * vd,uint32_t desc,uint32_t nf,uint32_t esz,uint32_t max_elems)236 static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
237 uint32_t desc, uint32_t nf,
238 uint32_t esz, uint32_t max_elems)
239 {
240 uint32_t vta = vext_vta(desc);
241 int k;
242
243 if (vta == 0) {
244 return;
245 }
246
247 for (k = 0; k < nf; ++k) {
248 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
249 (k * max_elems + max_elems) * esz);
250 }
251 }
252
253 /*
254 * stride: access vector element from strided memory
255 */
256 static void
vext_ldst_stride(void * vd,void * v0,target_ulong base,target_ulong stride,CPURISCVState * env,uint32_t desc,uint32_t vm,vext_ldst_elem_fn_tlb * ldst_elem,uint32_t log2_esz,uintptr_t ra)257 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride,
258 CPURISCVState *env, uint32_t desc, uint32_t vm,
259 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz,
260 uintptr_t ra)
261 {
262 uint32_t i, k;
263 uint32_t nf = vext_nf(desc);
264 uint32_t max_elems = vext_max_elems(desc, log2_esz);
265 uint32_t esz = 1 << log2_esz;
266 uint32_t vma = vext_vma(desc);
267
268 VSTART_CHECK_EARLY_EXIT(env);
269
270 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
271 k = 0;
272 while (k < nf) {
273 if (!vm && !vext_elem_mask(v0, i)) {
274 /* set masked-off elements to 1s */
275 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
276 (i + k * max_elems + 1) * esz);
277 k++;
278 continue;
279 }
280 target_ulong addr = base + stride * i + (k << log2_esz);
281 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
282 k++;
283 }
284 }
285 env->vstart = 0;
286
287 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
288 }
289
290 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
291 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
292 target_ulong stride, CPURISCVState *env, \
293 uint32_t desc) \
294 { \
295 uint32_t vm = vext_vm(desc); \
296 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
297 ctzl(sizeof(ETYPE)), GETPC()); \
298 }
299
GEN_VEXT_LD_STRIDE(vlse8_v,int8_t,lde_b_tlb)300 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb)
301 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb)
302 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb)
303 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb)
304
305 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
306 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
307 target_ulong stride, CPURISCVState *env, \
308 uint32_t desc) \
309 { \
310 uint32_t vm = vext_vm(desc); \
311 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
312 ctzl(sizeof(ETYPE)), GETPC()); \
313 }
314
315 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb)
316 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb)
317 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb)
318 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb)
319
320 /*
321 * unit-stride: access elements stored contiguously in memory
322 */
323
324 /* unmasked unit-stride load and store operation */
325 static inline QEMU_ALWAYS_INLINE void
326 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr,
327 uint32_t elems, uint32_t nf, uint32_t max_elems,
328 uint32_t log2_esz, bool is_load, int mmu_index,
329 vext_ldst_elem_fn_tlb *ldst_tlb,
330 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra)
331 {
332 void *host;
333 int i, k, flags;
334 uint32_t esz = 1 << log2_esz;
335 uint32_t size = (elems * nf) << log2_esz;
336 uint32_t evl = env->vstart + elems;
337 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
338
339 /* Check page permission/pmp/watchpoint/etc. */
340 flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type,
341 mmu_index, true, &host, ra);
342
343 if (flags == 0) {
344 if (nf == 1) {
345 vext_continus_ldst_host(env, ldst_host, vd, evl, env->vstart, host,
346 esz, is_load);
347 } else {
348 for (i = env->vstart; i < evl; ++i) {
349 k = 0;
350 while (k < nf) {
351 ldst_host(vd, i + k * max_elems, host);
352 host += esz;
353 k++;
354 }
355 }
356 }
357 env->vstart += elems;
358 } else {
359 if (nf == 1) {
360 vext_continus_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart,
361 ra, esz, is_load);
362 } else {
363 /* load bytes from guest memory */
364 for (i = env->vstart; i < evl; env->vstart = ++i) {
365 k = 0;
366 while (k < nf) {
367 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
368 vd, ra);
369 addr += esz;
370 k++;
371 }
372 }
373 }
374 }
375 }
376
377 static inline QEMU_ALWAYS_INLINE void
vext_ldst_us(void * vd,target_ulong base,CPURISCVState * env,uint32_t desc,vext_ldst_elem_fn_tlb * ldst_tlb,vext_ldst_elem_fn_host * ldst_host,uint32_t log2_esz,uint32_t evl,uintptr_t ra,bool is_load)378 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
379 vext_ldst_elem_fn_tlb *ldst_tlb,
380 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
381 uint32_t evl, uintptr_t ra, bool is_load)
382 {
383 uint32_t k;
384 target_ulong page_split, elems, addr;
385 uint32_t nf = vext_nf(desc);
386 uint32_t max_elems = vext_max_elems(desc, log2_esz);
387 uint32_t esz = 1 << log2_esz;
388 uint32_t msize = nf * esz;
389 int mmu_index = riscv_env_mmu_index(env, false);
390
391 if (env->vstart >= evl) {
392 env->vstart = 0;
393 return;
394 }
395
396 /* Calculate the page range of first page */
397 addr = base + ((env->vstart * nf) << log2_esz);
398 page_split = -(addr | TARGET_PAGE_MASK);
399 /* Get number of elements */
400 elems = page_split / msize;
401 if (unlikely(env->vstart + elems >= evl)) {
402 elems = evl - env->vstart;
403 }
404
405 /* Load/store elements in the first page */
406 if (likely(elems)) {
407 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
408 is_load, mmu_index, ldst_tlb, ldst_host, ra);
409 }
410
411 /* Load/store elements in the second page */
412 if (unlikely(env->vstart < evl)) {
413 /* Cross page element */
414 if (unlikely(page_split % msize)) {
415 for (k = 0; k < nf; k++) {
416 addr = base + ((env->vstart * nf + k) << log2_esz);
417 ldst_tlb(env, adjust_addr(env, addr),
418 env->vstart + k * max_elems, vd, ra);
419 }
420 env->vstart++;
421 }
422
423 addr = base + ((env->vstart * nf) << log2_esz);
424 /* Get number of elements of second page */
425 elems = evl - env->vstart;
426
427 /* Load/store elements in the second page */
428 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
429 is_load, mmu_index, ldst_tlb, ldst_host, ra);
430 }
431
432 env->vstart = 0;
433 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
434 }
435
436 /*
437 * masked unit-stride load and store operation will be a special case of
438 * stride, stride = NF * sizeof (ETYPE)
439 */
440
441 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
442 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
443 CPURISCVState *env, uint32_t desc) \
444 { \
445 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
446 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
447 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
448 } \
449 \
450 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
451 CPURISCVState *env, uint32_t desc) \
452 { \
453 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
454 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \
455 }
456
GEN_VEXT_LD_US(vle8_v,int8_t,lde_b_tlb,lde_b_host)457 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host)
458 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host)
459 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host)
460 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host)
461
462 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
463 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
464 CPURISCVState *env, uint32_t desc) \
465 { \
466 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
467 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
468 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
469 } \
470 \
471 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
472 CPURISCVState *env, uint32_t desc) \
473 { \
474 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
475 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \
476 }
477
478 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host)
479 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host)
480 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host)
481 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host)
482
483 /*
484 * unit stride mask load and store, EEW = 1
485 */
486 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
487 CPURISCVState *env, uint32_t desc)
488 {
489 /* evl = ceil(vl/8) */
490 uint8_t evl = (env->vl + 7) >> 3;
491 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host,
492 0, evl, GETPC(), true);
493 }
494
HELPER(vsm_v)495 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
496 CPURISCVState *env, uint32_t desc)
497 {
498 /* evl = ceil(vl/8) */
499 uint8_t evl = (env->vl + 7) >> 3;
500 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host,
501 0, evl, GETPC(), false);
502 }
503
504 /*
505 * index: access vector element from indexed memory
506 */
507 typedef target_ulong vext_get_index_addr(target_ulong base,
508 uint32_t idx, void *vs2);
509
510 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
511 static target_ulong NAME(target_ulong base, \
512 uint32_t idx, void *vs2) \
513 { \
514 return (base + *((ETYPE *)vs2 + H(idx))); \
515 }
516
GEN_VEXT_GET_INDEX_ADDR(idx_b,uint8_t,H1)517 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
518 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
519 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
520 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
521
522 static inline void
523 vext_ldst_index(void *vd, void *v0, target_ulong base,
524 void *vs2, CPURISCVState *env, uint32_t desc,
525 vext_get_index_addr get_index_addr,
526 vext_ldst_elem_fn_tlb *ldst_elem,
527 uint32_t log2_esz, uintptr_t ra)
528 {
529 uint32_t i, k;
530 uint32_t nf = vext_nf(desc);
531 uint32_t vm = vext_vm(desc);
532 uint32_t max_elems = vext_max_elems(desc, log2_esz);
533 uint32_t esz = 1 << log2_esz;
534 uint32_t vma = vext_vma(desc);
535
536 VSTART_CHECK_EARLY_EXIT(env);
537
538 /* load bytes from guest memory */
539 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
540 k = 0;
541 while (k < nf) {
542 if (!vm && !vext_elem_mask(v0, i)) {
543 /* set masked-off elements to 1s */
544 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
545 (i + k * max_elems + 1) * esz);
546 k++;
547 continue;
548 }
549 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
550 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
551 k++;
552 }
553 }
554 env->vstart = 0;
555
556 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
557 }
558
559 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
560 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
561 void *vs2, CPURISCVState *env, uint32_t desc) \
562 { \
563 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
564 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
565 }
566
GEN_VEXT_LD_INDEX(vlxei8_8_v,int8_t,idx_b,lde_b_tlb)567 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb)
568 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb)
569 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb)
570 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb)
571 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb)
572 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb)
573 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb)
574 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb)
575 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb)
576 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb)
577 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb)
578 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb)
579 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb)
580 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb)
581 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb)
582 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb)
583
584 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
585 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
586 void *vs2, CPURISCVState *env, uint32_t desc) \
587 { \
588 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
589 STORE_FN, ctzl(sizeof(ETYPE)), \
590 GETPC()); \
591 }
592
593 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb)
594 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb)
595 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb)
596 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb)
597 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb)
598 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb)
599 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb)
600 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb)
601 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb)
602 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb)
603 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb)
604 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb)
605 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb)
606 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb)
607 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb)
608 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb)
609
610 /*
611 * unit-stride fault-only-fisrt load instructions
612 */
613 static inline void
614 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env,
615 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb,
616 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra)
617 {
618 uint32_t i, k, vl = 0;
619 uint32_t nf = vext_nf(desc);
620 uint32_t vm = vext_vm(desc);
621 uint32_t max_elems = vext_max_elems(desc, log2_esz);
622 uint32_t esz = 1 << log2_esz;
623 uint32_t msize = nf * esz;
624 uint32_t vma = vext_vma(desc);
625 target_ulong addr, offset, remain, page_split, elems;
626 int mmu_index = riscv_env_mmu_index(env, false);
627
628 VSTART_CHECK_EARLY_EXIT(env);
629
630 /* probe every access */
631 for (i = env->vstart; i < env->vl; i++) {
632 if (!vm && !vext_elem_mask(v0, i)) {
633 continue;
634 }
635 addr = adjust_addr(env, base + i * (nf << log2_esz));
636 if (i == 0) {
637 /* Allow fault on first element. */
638 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
639 } else {
640 remain = nf << log2_esz;
641 while (remain > 0) {
642 void *host;
643 int flags;
644
645 offset = -(addr | TARGET_PAGE_MASK);
646
647 /* Probe nonfault on subsequent elements. */
648 flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
649 mmu_index, true, &host, 0);
650
651 /*
652 * Stop if invalid (unmapped) or mmio (transaction may fail).
653 * Do not stop if watchpoint, as the spec says that
654 * first-fault should continue to access the same
655 * elements regardless of any watchpoint.
656 */
657 if (flags & ~TLB_WATCHPOINT) {
658 vl = i;
659 goto ProbeSuccess;
660 }
661 if (remain <= offset) {
662 break;
663 }
664 remain -= offset;
665 addr = adjust_addr(env, addr + offset);
666 }
667 }
668 }
669 ProbeSuccess:
670 /* load bytes from guest memory */
671 if (vl != 0) {
672 env->vl = vl;
673 }
674
675 if (env->vstart < env->vl) {
676 if (vm) {
677 /* Calculate the page range of first page */
678 addr = base + ((env->vstart * nf) << log2_esz);
679 page_split = -(addr | TARGET_PAGE_MASK);
680 /* Get number of elements */
681 elems = page_split / msize;
682 if (unlikely(env->vstart + elems >= env->vl)) {
683 elems = env->vl - env->vstart;
684 }
685
686 /* Load/store elements in the first page */
687 if (likely(elems)) {
688 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
689 log2_esz, true, mmu_index, ldst_tlb,
690 ldst_host, ra);
691 }
692
693 /* Load/store elements in the second page */
694 if (unlikely(env->vstart < env->vl)) {
695 /* Cross page element */
696 if (unlikely(page_split % msize)) {
697 for (k = 0; k < nf; k++) {
698 addr = base + ((env->vstart * nf + k) << log2_esz);
699 ldst_tlb(env, adjust_addr(env, addr),
700 env->vstart + k * max_elems, vd, ra);
701 }
702 env->vstart++;
703 }
704
705 addr = base + ((env->vstart * nf) << log2_esz);
706 /* Get number of elements of second page */
707 elems = env->vl - env->vstart;
708
709 /* Load/store elements in the second page */
710 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
711 log2_esz, true, mmu_index, ldst_tlb,
712 ldst_host, ra);
713 }
714 } else {
715 for (i = env->vstart; i < env->vl; i++) {
716 k = 0;
717 while (k < nf) {
718 if (!vext_elem_mask(v0, i)) {
719 /* set masked-off elements to 1s */
720 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
721 (i + k * max_elems + 1) * esz);
722 k++;
723 continue;
724 }
725 addr = base + ((i * nf + k) << log2_esz);
726 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
727 vd, ra);
728 k++;
729 }
730 }
731 }
732 }
733 env->vstart = 0;
734
735 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
736 }
737
738 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
739 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
740 CPURISCVState *env, uint32_t desc) \
741 { \
742 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \
743 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \
744 }
745
GEN_VEXT_LDFF(vle8ff_v,int8_t,lde_b_tlb,lde_b_host)746 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host)
747 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host)
748 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host)
749 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host)
750
751 #define DO_SWAP(N, M) (M)
752 #define DO_AND(N, M) (N & M)
753 #define DO_XOR(N, M) (N ^ M)
754 #define DO_OR(N, M) (N | M)
755 #define DO_ADD(N, M) (N + M)
756
757 /* Signed min/max */
758 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
759 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
760
761 /*
762 * load and store whole register instructions
763 */
764 static inline QEMU_ALWAYS_INLINE void
765 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
766 vext_ldst_elem_fn_tlb *ldst_tlb,
767 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
768 uintptr_t ra, bool is_load)
769 {
770 target_ulong page_split, elems, addr;
771 uint32_t nf = vext_nf(desc);
772 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
773 uint32_t max_elems = vlenb >> log2_esz;
774 uint32_t evl = nf * max_elems;
775 uint32_t esz = 1 << log2_esz;
776 int mmu_index = riscv_env_mmu_index(env, false);
777
778 /* Calculate the page range of first page */
779 addr = base + (env->vstart << log2_esz);
780 page_split = -(addr | TARGET_PAGE_MASK);
781 /* Get number of elements */
782 elems = page_split / esz;
783 if (unlikely(env->vstart + elems >= evl)) {
784 elems = evl - env->vstart;
785 }
786
787 /* Load/store elements in the first page */
788 if (likely(elems)) {
789 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
790 is_load, mmu_index, ldst_tlb, ldst_host, ra);
791 }
792
793 /* Load/store elements in the second page */
794 if (unlikely(env->vstart < evl)) {
795 /* Cross page element */
796 if (unlikely(page_split % esz)) {
797 addr = base + (env->vstart << log2_esz);
798 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra);
799 env->vstart++;
800 }
801
802 addr = base + (env->vstart << log2_esz);
803 /* Get number of elements of second page */
804 elems = evl - env->vstart;
805
806 /* Load/store elements in the second page */
807 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
808 is_load, mmu_index, ldst_tlb, ldst_host, ra);
809 }
810
811 env->vstart = 0;
812 }
813
814 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
815 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
816 uint32_t desc) \
817 { \
818 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
819 ctzl(sizeof(ETYPE)), GETPC(), true); \
820 }
821
GEN_VEXT_LD_WHOLE(vl1re8_v,int8_t,lde_b_tlb,lde_b_host)822 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host)
823 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host)
824 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host)
825 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host)
826 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host)
827 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host)
828 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host)
829 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host)
830 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host)
831 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host)
832 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host)
833 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host)
834 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host)
835 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host)
836 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host)
837 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host)
838
839 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
840 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
841 uint32_t desc) \
842 { \
843 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
844 ctzl(sizeof(ETYPE)), GETPC(), false); \
845 }
846
847 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host)
848 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host)
849 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host)
850 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host)
851
852 /*
853 * Vector Integer Arithmetic Instructions
854 */
855
856 /* (TD, T1, T2, TX1, TX2) */
857 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
858 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
859 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
860 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
861 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
862 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
863 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
864 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
865 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
866 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
867 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
868 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
869 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
870 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
871 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
872 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
873 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
874 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
875 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
876 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
877 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
878 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
879 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
880
881 #define DO_SUB(N, M) (N - M)
882 #define DO_RSUB(N, M) (M - N)
883
884 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
885 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
886 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
887 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
888 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
889 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
890 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
891 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
892
893 GEN_VEXT_VV(vadd_vv_b, 1)
894 GEN_VEXT_VV(vadd_vv_h, 2)
895 GEN_VEXT_VV(vadd_vv_w, 4)
896 GEN_VEXT_VV(vadd_vv_d, 8)
897 GEN_VEXT_VV(vsub_vv_b, 1)
898 GEN_VEXT_VV(vsub_vv_h, 2)
899 GEN_VEXT_VV(vsub_vv_w, 4)
900 GEN_VEXT_VV(vsub_vv_d, 8)
901
902
903 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
904 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
905 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
906 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
907 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
908 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
909 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
910 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
911 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
912 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
913 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
914 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
915
916 GEN_VEXT_VX(vadd_vx_b, 1)
917 GEN_VEXT_VX(vadd_vx_h, 2)
918 GEN_VEXT_VX(vadd_vx_w, 4)
919 GEN_VEXT_VX(vadd_vx_d, 8)
920 GEN_VEXT_VX(vsub_vx_b, 1)
921 GEN_VEXT_VX(vsub_vx_h, 2)
922 GEN_VEXT_VX(vsub_vx_w, 4)
923 GEN_VEXT_VX(vsub_vx_d, 8)
924 GEN_VEXT_VX(vrsub_vx_b, 1)
925 GEN_VEXT_VX(vrsub_vx_h, 2)
926 GEN_VEXT_VX(vrsub_vx_w, 4)
927 GEN_VEXT_VX(vrsub_vx_d, 8)
928
929 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
930 {
931 intptr_t oprsz = simd_oprsz(desc);
932 intptr_t i;
933
934 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
935 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
936 }
937 }
938
HELPER(vec_rsubs16)939 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
940 {
941 intptr_t oprsz = simd_oprsz(desc);
942 intptr_t i;
943
944 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
945 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
946 }
947 }
948
HELPER(vec_rsubs32)949 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
950 {
951 intptr_t oprsz = simd_oprsz(desc);
952 intptr_t i;
953
954 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
955 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
956 }
957 }
958
HELPER(vec_rsubs64)959 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
960 {
961 intptr_t oprsz = simd_oprsz(desc);
962 intptr_t i;
963
964 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
965 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
966 }
967 }
968
969 /* Vector Widening Integer Add/Subtract */
970 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
971 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
972 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
973 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
974 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
975 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
976 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
977 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
978 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
979 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
980 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
981 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
RVVCALL(OPIVV2,vwaddu_vv_b,WOP_UUU_B,H2,H1,H1,DO_ADD)982 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
983 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
984 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
985 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
986 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
987 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
988 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
989 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
990 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
991 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
992 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
993 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
994 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
995 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
996 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
997 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
998 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
999 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
1000 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
1001 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
1002 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1003 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1004 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1005 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
1006 GEN_VEXT_VV(vwaddu_vv_b, 2)
1007 GEN_VEXT_VV(vwaddu_vv_h, 4)
1008 GEN_VEXT_VV(vwaddu_vv_w, 8)
1009 GEN_VEXT_VV(vwsubu_vv_b, 2)
1010 GEN_VEXT_VV(vwsubu_vv_h, 4)
1011 GEN_VEXT_VV(vwsubu_vv_w, 8)
1012 GEN_VEXT_VV(vwadd_vv_b, 2)
1013 GEN_VEXT_VV(vwadd_vv_h, 4)
1014 GEN_VEXT_VV(vwadd_vv_w, 8)
1015 GEN_VEXT_VV(vwsub_vv_b, 2)
1016 GEN_VEXT_VV(vwsub_vv_h, 4)
1017 GEN_VEXT_VV(vwsub_vv_w, 8)
1018 GEN_VEXT_VV(vwaddu_wv_b, 2)
1019 GEN_VEXT_VV(vwaddu_wv_h, 4)
1020 GEN_VEXT_VV(vwaddu_wv_w, 8)
1021 GEN_VEXT_VV(vwsubu_wv_b, 2)
1022 GEN_VEXT_VV(vwsubu_wv_h, 4)
1023 GEN_VEXT_VV(vwsubu_wv_w, 8)
1024 GEN_VEXT_VV(vwadd_wv_b, 2)
1025 GEN_VEXT_VV(vwadd_wv_h, 4)
1026 GEN_VEXT_VV(vwadd_wv_w, 8)
1027 GEN_VEXT_VV(vwsub_wv_b, 2)
1028 GEN_VEXT_VV(vwsub_wv_h, 4)
1029 GEN_VEXT_VV(vwsub_wv_w, 8)
1030
1031 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1032 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1033 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1034 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1035 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1036 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1037 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1038 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1039 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1040 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1041 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1042 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1043 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1044 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1045 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1046 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1047 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1048 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1049 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1050 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1051 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1052 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1053 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1054 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1055 GEN_VEXT_VX(vwaddu_vx_b, 2)
1056 GEN_VEXT_VX(vwaddu_vx_h, 4)
1057 GEN_VEXT_VX(vwaddu_vx_w, 8)
1058 GEN_VEXT_VX(vwsubu_vx_b, 2)
1059 GEN_VEXT_VX(vwsubu_vx_h, 4)
1060 GEN_VEXT_VX(vwsubu_vx_w, 8)
1061 GEN_VEXT_VX(vwadd_vx_b, 2)
1062 GEN_VEXT_VX(vwadd_vx_h, 4)
1063 GEN_VEXT_VX(vwadd_vx_w, 8)
1064 GEN_VEXT_VX(vwsub_vx_b, 2)
1065 GEN_VEXT_VX(vwsub_vx_h, 4)
1066 GEN_VEXT_VX(vwsub_vx_w, 8)
1067 GEN_VEXT_VX(vwaddu_wx_b, 2)
1068 GEN_VEXT_VX(vwaddu_wx_h, 4)
1069 GEN_VEXT_VX(vwaddu_wx_w, 8)
1070 GEN_VEXT_VX(vwsubu_wx_b, 2)
1071 GEN_VEXT_VX(vwsubu_wx_h, 4)
1072 GEN_VEXT_VX(vwsubu_wx_w, 8)
1073 GEN_VEXT_VX(vwadd_wx_b, 2)
1074 GEN_VEXT_VX(vwadd_wx_h, 4)
1075 GEN_VEXT_VX(vwadd_wx_w, 8)
1076 GEN_VEXT_VX(vwsub_wx_b, 2)
1077 GEN_VEXT_VX(vwsub_wx_h, 4)
1078 GEN_VEXT_VX(vwsub_wx_w, 8)
1079
1080 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1081 #define DO_VADC(N, M, C) (N + M + C)
1082 #define DO_VSBC(N, M, C) (N - M - C)
1083
1084 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
1085 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1086 CPURISCVState *env, uint32_t desc) \
1087 { \
1088 uint32_t vl = env->vl; \
1089 uint32_t esz = sizeof(ETYPE); \
1090 uint32_t total_elems = \
1091 vext_get_total_elems(env, desc, esz); \
1092 uint32_t vta = vext_vta(desc); \
1093 uint32_t i; \
1094 \
1095 VSTART_CHECK_EARLY_EXIT(env); \
1096 \
1097 for (i = env->vstart; i < vl; i++) { \
1098 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1099 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1100 ETYPE carry = vext_elem_mask(v0, i); \
1101 \
1102 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1103 } \
1104 env->vstart = 0; \
1105 /* set tail elements to 1s */ \
1106 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1107 }
1108
1109 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1110 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1111 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1112 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
1113
1114 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1115 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1116 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1117 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
1118
1119 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
1120 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1121 CPURISCVState *env, uint32_t desc) \
1122 { \
1123 uint32_t vl = env->vl; \
1124 uint32_t esz = sizeof(ETYPE); \
1125 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1126 uint32_t vta = vext_vta(desc); \
1127 uint32_t i; \
1128 \
1129 VSTART_CHECK_EARLY_EXIT(env); \
1130 \
1131 for (i = env->vstart; i < vl; i++) { \
1132 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1133 ETYPE carry = vext_elem_mask(v0, i); \
1134 \
1135 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1136 } \
1137 env->vstart = 0; \
1138 /* set tail elements to 1s */ \
1139 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1140 }
1141
1142 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1143 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1144 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1145 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1146
1147 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1148 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1149 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1150 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1151
1152 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1153 (__typeof(N))(N + M) < N)
1154 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1155
1156 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1157 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1158 CPURISCVState *env, uint32_t desc) \
1159 { \
1160 uint32_t vl = env->vl; \
1161 uint32_t vm = vext_vm(desc); \
1162 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1163 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1164 uint32_t i; \
1165 \
1166 VSTART_CHECK_EARLY_EXIT(env); \
1167 \
1168 for (i = env->vstart; i < vl; i++) { \
1169 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1170 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1171 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1172 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1173 } \
1174 env->vstart = 0; \
1175 /*
1176 * mask destination register are always tail-agnostic
1177 * set tail elements to 1s
1178 */ \
1179 if (vta_all_1s) { \
1180 for (; i < total_elems; i++) { \
1181 vext_set_elem_mask(vd, i, 1); \
1182 } \
1183 } \
1184 }
1185
1186 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1187 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1188 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1189 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1190
1191 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1192 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1193 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1194 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1195
1196 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1197 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1198 void *vs2, CPURISCVState *env, uint32_t desc) \
1199 { \
1200 uint32_t vl = env->vl; \
1201 uint32_t vm = vext_vm(desc); \
1202 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1203 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1204 uint32_t i; \
1205 \
1206 VSTART_CHECK_EARLY_EXIT(env); \
1207 \
1208 for (i = env->vstart; i < vl; i++) { \
1209 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1210 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1211 vext_set_elem_mask(vd, i, \
1212 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1213 } \
1214 env->vstart = 0; \
1215 /*
1216 * mask destination register are always tail-agnostic
1217 * set tail elements to 1s
1218 */ \
1219 if (vta_all_1s) { \
1220 for (; i < total_elems; i++) { \
1221 vext_set_elem_mask(vd, i, 1); \
1222 } \
1223 } \
1224 }
1225
1226 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1227 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1228 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1229 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1230
1231 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1232 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1233 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1234 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1235
1236 /* Vector Bitwise Logical Instructions */
1237 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1238 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1239 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1240 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1241 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1242 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1243 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1244 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1245 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1246 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1247 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1248 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1249 GEN_VEXT_VV(vand_vv_b, 1)
1250 GEN_VEXT_VV(vand_vv_h, 2)
1251 GEN_VEXT_VV(vand_vv_w, 4)
1252 GEN_VEXT_VV(vand_vv_d, 8)
1253 GEN_VEXT_VV(vor_vv_b, 1)
1254 GEN_VEXT_VV(vor_vv_h, 2)
1255 GEN_VEXT_VV(vor_vv_w, 4)
1256 GEN_VEXT_VV(vor_vv_d, 8)
1257 GEN_VEXT_VV(vxor_vv_b, 1)
1258 GEN_VEXT_VV(vxor_vv_h, 2)
1259 GEN_VEXT_VV(vxor_vv_w, 4)
1260 GEN_VEXT_VV(vxor_vv_d, 8)
1261
1262 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1263 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1264 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1265 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1266 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1267 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1268 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1269 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1270 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1271 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1272 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1273 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1274 GEN_VEXT_VX(vand_vx_b, 1)
1275 GEN_VEXT_VX(vand_vx_h, 2)
1276 GEN_VEXT_VX(vand_vx_w, 4)
1277 GEN_VEXT_VX(vand_vx_d, 8)
1278 GEN_VEXT_VX(vor_vx_b, 1)
1279 GEN_VEXT_VX(vor_vx_h, 2)
1280 GEN_VEXT_VX(vor_vx_w, 4)
1281 GEN_VEXT_VX(vor_vx_d, 8)
1282 GEN_VEXT_VX(vxor_vx_b, 1)
1283 GEN_VEXT_VX(vxor_vx_h, 2)
1284 GEN_VEXT_VX(vxor_vx_w, 4)
1285 GEN_VEXT_VX(vxor_vx_d, 8)
1286
1287 /* Vector Single-Width Bit Shift Instructions */
1288 #define DO_SLL(N, M) (N << (M))
1289 #define DO_SRL(N, M) (N >> (M))
1290
1291 /* generate the helpers for shift instructions with two vector operators */
1292 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1293 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1294 void *vs2, CPURISCVState *env, uint32_t desc) \
1295 { \
1296 uint32_t vm = vext_vm(desc); \
1297 uint32_t vl = env->vl; \
1298 uint32_t esz = sizeof(TS1); \
1299 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1300 uint32_t vta = vext_vta(desc); \
1301 uint32_t vma = vext_vma(desc); \
1302 uint32_t i; \
1303 \
1304 VSTART_CHECK_EARLY_EXIT(env); \
1305 \
1306 for (i = env->vstart; i < vl; i++) { \
1307 if (!vm && !vext_elem_mask(v0, i)) { \
1308 /* set masked-off elements to 1s */ \
1309 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
1310 continue; \
1311 } \
1312 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1313 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1314 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1315 } \
1316 env->vstart = 0; \
1317 /* set tail elements to 1s */ \
1318 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1319 }
1320
1321 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1322 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1323 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1324 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1325
1326 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1327 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1328 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1329 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1330
1331 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1332 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1333 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1334 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1335
1336 /*
1337 * generate the helpers for shift instructions with one vector and one scalar
1338 */
1339 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1340 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1341 void *vs2, CPURISCVState *env, \
1342 uint32_t desc) \
1343 { \
1344 uint32_t vm = vext_vm(desc); \
1345 uint32_t vl = env->vl; \
1346 uint32_t esz = sizeof(TD); \
1347 uint32_t total_elems = \
1348 vext_get_total_elems(env, desc, esz); \
1349 uint32_t vta = vext_vta(desc); \
1350 uint32_t vma = vext_vma(desc); \
1351 uint32_t i; \
1352 \
1353 VSTART_CHECK_EARLY_EXIT(env); \
1354 \
1355 for (i = env->vstart; i < vl; i++) { \
1356 if (!vm && !vext_elem_mask(v0, i)) { \
1357 /* set masked-off elements to 1s */ \
1358 vext_set_elems_1s(vd, vma, i * esz, \
1359 (i + 1) * esz); \
1360 continue; \
1361 } \
1362 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1363 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1364 } \
1365 env->vstart = 0; \
1366 /* set tail elements to 1s */ \
1367 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
1368 }
1369
1370 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1371 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1372 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1373 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1374
1375 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1376 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1377 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1378 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1379
1380 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1381 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1382 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1383 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1384
1385 /* Vector Narrowing Integer Right Shift Instructions */
1386 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1387 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1388 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1389 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1390 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1391 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1392 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1393 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1394 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1395 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1396 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1397 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1398
1399 /* Vector Integer Comparison Instructions */
1400 #define DO_MSEQ(N, M) (N == M)
1401 #define DO_MSNE(N, M) (N != M)
1402 #define DO_MSLT(N, M) (N < M)
1403 #define DO_MSLE(N, M) (N <= M)
1404 #define DO_MSGT(N, M) (N > M)
1405
1406 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1407 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1408 CPURISCVState *env, uint32_t desc) \
1409 { \
1410 uint32_t vm = vext_vm(desc); \
1411 uint32_t vl = env->vl; \
1412 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1413 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1414 uint32_t vma = vext_vma(desc); \
1415 uint32_t i; \
1416 \
1417 VSTART_CHECK_EARLY_EXIT(env); \
1418 \
1419 for (i = env->vstart; i < vl; i++) { \
1420 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1421 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1422 if (!vm && !vext_elem_mask(v0, i)) { \
1423 /* set masked-off elements to 1s */ \
1424 if (vma) { \
1425 vext_set_elem_mask(vd, i, 1); \
1426 } \
1427 continue; \
1428 } \
1429 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1430 } \
1431 env->vstart = 0; \
1432 /*
1433 * mask destination register are always tail-agnostic
1434 * set tail elements to 1s
1435 */ \
1436 if (vta_all_1s) { \
1437 for (; i < total_elems; i++) { \
1438 vext_set_elem_mask(vd, i, 1); \
1439 } \
1440 } \
1441 }
1442
1443 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1444 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1445 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1446 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1447
1448 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1449 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1450 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1451 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1452
1453 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1454 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1455 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1456 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1457
1458 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1459 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1460 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1461 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1462
1463 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1464 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1465 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1466 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1467
1468 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1469 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1470 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1471 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1472
1473 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1474 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1475 CPURISCVState *env, uint32_t desc) \
1476 { \
1477 uint32_t vm = vext_vm(desc); \
1478 uint32_t vl = env->vl; \
1479 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1480 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1481 uint32_t vma = vext_vma(desc); \
1482 uint32_t i; \
1483 \
1484 VSTART_CHECK_EARLY_EXIT(env); \
1485 \
1486 for (i = env->vstart; i < vl; i++) { \
1487 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1488 if (!vm && !vext_elem_mask(v0, i)) { \
1489 /* set masked-off elements to 1s */ \
1490 if (vma) { \
1491 vext_set_elem_mask(vd, i, 1); \
1492 } \
1493 continue; \
1494 } \
1495 vext_set_elem_mask(vd, i, \
1496 DO_OP(s2, (ETYPE)(target_long)s1)); \
1497 } \
1498 env->vstart = 0; \
1499 /*
1500 * mask destination register are always tail-agnostic
1501 * set tail elements to 1s
1502 */ \
1503 if (vta_all_1s) { \
1504 for (; i < total_elems; i++) { \
1505 vext_set_elem_mask(vd, i, 1); \
1506 } \
1507 } \
1508 }
1509
1510 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1511 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1512 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1513 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1514
1515 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1516 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1517 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1518 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1519
1520 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1521 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1522 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1523 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1524
1525 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1526 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1527 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1528 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1529
1530 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1531 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1532 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1533 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1534
1535 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1536 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1537 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1538 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1539
1540 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1541 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1542 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1543 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1544
1545 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1546 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1547 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1548 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1549
1550 /* Vector Integer Min/Max Instructions */
1551 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1552 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1553 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1554 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1555 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1556 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1557 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1558 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1559 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1560 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1561 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1562 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1563 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1564 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1565 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1566 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1567 GEN_VEXT_VV(vminu_vv_b, 1)
1568 GEN_VEXT_VV(vminu_vv_h, 2)
1569 GEN_VEXT_VV(vminu_vv_w, 4)
1570 GEN_VEXT_VV(vminu_vv_d, 8)
1571 GEN_VEXT_VV(vmin_vv_b, 1)
1572 GEN_VEXT_VV(vmin_vv_h, 2)
1573 GEN_VEXT_VV(vmin_vv_w, 4)
1574 GEN_VEXT_VV(vmin_vv_d, 8)
1575 GEN_VEXT_VV(vmaxu_vv_b, 1)
1576 GEN_VEXT_VV(vmaxu_vv_h, 2)
1577 GEN_VEXT_VV(vmaxu_vv_w, 4)
1578 GEN_VEXT_VV(vmaxu_vv_d, 8)
1579 GEN_VEXT_VV(vmax_vv_b, 1)
1580 GEN_VEXT_VV(vmax_vv_h, 2)
1581 GEN_VEXT_VV(vmax_vv_w, 4)
1582 GEN_VEXT_VV(vmax_vv_d, 8)
1583
1584 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1585 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1586 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1587 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1588 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1589 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1590 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1591 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1592 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1593 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1594 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1595 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1596 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1597 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1598 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1599 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1600 GEN_VEXT_VX(vminu_vx_b, 1)
1601 GEN_VEXT_VX(vminu_vx_h, 2)
1602 GEN_VEXT_VX(vminu_vx_w, 4)
1603 GEN_VEXT_VX(vminu_vx_d, 8)
1604 GEN_VEXT_VX(vmin_vx_b, 1)
1605 GEN_VEXT_VX(vmin_vx_h, 2)
1606 GEN_VEXT_VX(vmin_vx_w, 4)
1607 GEN_VEXT_VX(vmin_vx_d, 8)
1608 GEN_VEXT_VX(vmaxu_vx_b, 1)
1609 GEN_VEXT_VX(vmaxu_vx_h, 2)
1610 GEN_VEXT_VX(vmaxu_vx_w, 4)
1611 GEN_VEXT_VX(vmaxu_vx_d, 8)
1612 GEN_VEXT_VX(vmax_vx_b, 1)
1613 GEN_VEXT_VX(vmax_vx_h, 2)
1614 GEN_VEXT_VX(vmax_vx_w, 4)
1615 GEN_VEXT_VX(vmax_vx_d, 8)
1616
1617 /* Vector Single-Width Integer Multiply Instructions */
1618 #define DO_MUL(N, M) (N * M)
1619 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1620 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1621 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1622 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1623 GEN_VEXT_VV(vmul_vv_b, 1)
1624 GEN_VEXT_VV(vmul_vv_h, 2)
1625 GEN_VEXT_VV(vmul_vv_w, 4)
1626 GEN_VEXT_VV(vmul_vv_d, 8)
1627
1628 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1629 {
1630 return (int16_t)s2 * (int16_t)s1 >> 8;
1631 }
1632
do_mulh_h(int16_t s2,int16_t s1)1633 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1634 {
1635 return (int32_t)s2 * (int32_t)s1 >> 16;
1636 }
1637
do_mulh_w(int32_t s2,int32_t s1)1638 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1639 {
1640 return (int64_t)s2 * (int64_t)s1 >> 32;
1641 }
1642
do_mulh_d(int64_t s2,int64_t s1)1643 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1644 {
1645 uint64_t hi_64, lo_64;
1646
1647 muls64(&lo_64, &hi_64, s1, s2);
1648 return hi_64;
1649 }
1650
do_mulhu_b(uint8_t s2,uint8_t s1)1651 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1652 {
1653 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1654 }
1655
do_mulhu_h(uint16_t s2,uint16_t s1)1656 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1657 {
1658 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1659 }
1660
do_mulhu_w(uint32_t s2,uint32_t s1)1661 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1662 {
1663 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1664 }
1665
do_mulhu_d(uint64_t s2,uint64_t s1)1666 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1667 {
1668 uint64_t hi_64, lo_64;
1669
1670 mulu64(&lo_64, &hi_64, s2, s1);
1671 return hi_64;
1672 }
1673
do_mulhsu_b(int8_t s2,uint8_t s1)1674 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1675 {
1676 return (int16_t)s2 * (uint16_t)s1 >> 8;
1677 }
1678
do_mulhsu_h(int16_t s2,uint16_t s1)1679 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1680 {
1681 return (int32_t)s2 * (uint32_t)s1 >> 16;
1682 }
1683
do_mulhsu_w(int32_t s2,uint32_t s1)1684 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1685 {
1686 return (int64_t)s2 * (uint64_t)s1 >> 32;
1687 }
1688
1689 /*
1690 * Let A = signed operand,
1691 * B = unsigned operand
1692 * P = mulu64(A, B), unsigned product
1693 *
1694 * LET X = 2 ** 64 - A, 2's complement of A
1695 * SP = signed product
1696 * THEN
1697 * IF A < 0
1698 * SP = -X * B
1699 * = -(2 ** 64 - A) * B
1700 * = A * B - 2 ** 64 * B
1701 * = P - 2 ** 64 * B
1702 * ELSE
1703 * SP = P
1704 * THEN
1705 * HI_P -= (A < 0 ? B : 0)
1706 */
1707
do_mulhsu_d(int64_t s2,uint64_t s1)1708 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1709 {
1710 uint64_t hi_64, lo_64;
1711
1712 mulu64(&lo_64, &hi_64, s2, s1);
1713
1714 hi_64 -= s2 < 0 ? s1 : 0;
1715 return hi_64;
1716 }
1717
1718 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1719 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1720 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1721 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1722 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1723 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1724 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1725 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1726 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1727 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1728 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1729 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1730 GEN_VEXT_VV(vmulh_vv_b, 1)
1731 GEN_VEXT_VV(vmulh_vv_h, 2)
1732 GEN_VEXT_VV(vmulh_vv_w, 4)
1733 GEN_VEXT_VV(vmulh_vv_d, 8)
1734 GEN_VEXT_VV(vmulhu_vv_b, 1)
1735 GEN_VEXT_VV(vmulhu_vv_h, 2)
1736 GEN_VEXT_VV(vmulhu_vv_w, 4)
1737 GEN_VEXT_VV(vmulhu_vv_d, 8)
1738 GEN_VEXT_VV(vmulhsu_vv_b, 1)
1739 GEN_VEXT_VV(vmulhsu_vv_h, 2)
1740 GEN_VEXT_VV(vmulhsu_vv_w, 4)
1741 GEN_VEXT_VV(vmulhsu_vv_d, 8)
1742
1743 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1744 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1745 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1746 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1747 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1748 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1749 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1750 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1751 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1752 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1753 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1754 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1755 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1756 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1757 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1758 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1759 GEN_VEXT_VX(vmul_vx_b, 1)
1760 GEN_VEXT_VX(vmul_vx_h, 2)
1761 GEN_VEXT_VX(vmul_vx_w, 4)
1762 GEN_VEXT_VX(vmul_vx_d, 8)
1763 GEN_VEXT_VX(vmulh_vx_b, 1)
1764 GEN_VEXT_VX(vmulh_vx_h, 2)
1765 GEN_VEXT_VX(vmulh_vx_w, 4)
1766 GEN_VEXT_VX(vmulh_vx_d, 8)
1767 GEN_VEXT_VX(vmulhu_vx_b, 1)
1768 GEN_VEXT_VX(vmulhu_vx_h, 2)
1769 GEN_VEXT_VX(vmulhu_vx_w, 4)
1770 GEN_VEXT_VX(vmulhu_vx_d, 8)
1771 GEN_VEXT_VX(vmulhsu_vx_b, 1)
1772 GEN_VEXT_VX(vmulhsu_vx_h, 2)
1773 GEN_VEXT_VX(vmulhsu_vx_w, 4)
1774 GEN_VEXT_VX(vmulhsu_vx_d, 8)
1775
1776 /* Vector Integer Divide Instructions */
1777 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1778 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1779 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
1780 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1781 #define DO_REM(N, M) (unlikely(M == 0) ? N : \
1782 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1783
1784 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1785 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1786 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1787 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1788 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1789 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1790 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1791 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1792 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1793 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1794 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1795 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1796 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1797 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1798 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1799 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1800 GEN_VEXT_VV(vdivu_vv_b, 1)
1801 GEN_VEXT_VV(vdivu_vv_h, 2)
1802 GEN_VEXT_VV(vdivu_vv_w, 4)
1803 GEN_VEXT_VV(vdivu_vv_d, 8)
1804 GEN_VEXT_VV(vdiv_vv_b, 1)
1805 GEN_VEXT_VV(vdiv_vv_h, 2)
1806 GEN_VEXT_VV(vdiv_vv_w, 4)
1807 GEN_VEXT_VV(vdiv_vv_d, 8)
1808 GEN_VEXT_VV(vremu_vv_b, 1)
1809 GEN_VEXT_VV(vremu_vv_h, 2)
1810 GEN_VEXT_VV(vremu_vv_w, 4)
1811 GEN_VEXT_VV(vremu_vv_d, 8)
1812 GEN_VEXT_VV(vrem_vv_b, 1)
1813 GEN_VEXT_VV(vrem_vv_h, 2)
1814 GEN_VEXT_VV(vrem_vv_w, 4)
1815 GEN_VEXT_VV(vrem_vv_d, 8)
1816
1817 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1818 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1819 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1820 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1821 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1822 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1823 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1824 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1825 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1826 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1827 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1828 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1829 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1830 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1831 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1832 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1833 GEN_VEXT_VX(vdivu_vx_b, 1)
1834 GEN_VEXT_VX(vdivu_vx_h, 2)
1835 GEN_VEXT_VX(vdivu_vx_w, 4)
1836 GEN_VEXT_VX(vdivu_vx_d, 8)
1837 GEN_VEXT_VX(vdiv_vx_b, 1)
1838 GEN_VEXT_VX(vdiv_vx_h, 2)
1839 GEN_VEXT_VX(vdiv_vx_w, 4)
1840 GEN_VEXT_VX(vdiv_vx_d, 8)
1841 GEN_VEXT_VX(vremu_vx_b, 1)
1842 GEN_VEXT_VX(vremu_vx_h, 2)
1843 GEN_VEXT_VX(vremu_vx_w, 4)
1844 GEN_VEXT_VX(vremu_vx_d, 8)
1845 GEN_VEXT_VX(vrem_vx_b, 1)
1846 GEN_VEXT_VX(vrem_vx_h, 2)
1847 GEN_VEXT_VX(vrem_vx_w, 4)
1848 GEN_VEXT_VX(vrem_vx_d, 8)
1849
1850 /* Vector Widening Integer Multiply Instructions */
1851 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1852 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1853 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1854 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1855 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1856 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1857 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1858 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1859 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1860 GEN_VEXT_VV(vwmul_vv_b, 2)
1861 GEN_VEXT_VV(vwmul_vv_h, 4)
1862 GEN_VEXT_VV(vwmul_vv_w, 8)
1863 GEN_VEXT_VV(vwmulu_vv_b, 2)
1864 GEN_VEXT_VV(vwmulu_vv_h, 4)
1865 GEN_VEXT_VV(vwmulu_vv_w, 8)
1866 GEN_VEXT_VV(vwmulsu_vv_b, 2)
1867 GEN_VEXT_VV(vwmulsu_vv_h, 4)
1868 GEN_VEXT_VV(vwmulsu_vv_w, 8)
1869
1870 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1871 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1872 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1873 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1874 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1875 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1876 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1877 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1878 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1879 GEN_VEXT_VX(vwmul_vx_b, 2)
1880 GEN_VEXT_VX(vwmul_vx_h, 4)
1881 GEN_VEXT_VX(vwmul_vx_w, 8)
1882 GEN_VEXT_VX(vwmulu_vx_b, 2)
1883 GEN_VEXT_VX(vwmulu_vx_h, 4)
1884 GEN_VEXT_VX(vwmulu_vx_w, 8)
1885 GEN_VEXT_VX(vwmulsu_vx_b, 2)
1886 GEN_VEXT_VX(vwmulsu_vx_h, 4)
1887 GEN_VEXT_VX(vwmulsu_vx_w, 8)
1888
1889 /* Vector Single-Width Integer Multiply-Add Instructions */
1890 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1891 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1892 { \
1893 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1894 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1895 TD d = *((TD *)vd + HD(i)); \
1896 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1897 }
1898
1899 #define DO_MACC(N, M, D) (M * N + D)
1900 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1901 #define DO_MADD(N, M, D) (M * D + N)
1902 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1903 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1904 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1905 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1906 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1907 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1908 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1909 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1910 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1911 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1912 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1913 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1914 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1915 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1916 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1917 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1918 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1919 GEN_VEXT_VV(vmacc_vv_b, 1)
1920 GEN_VEXT_VV(vmacc_vv_h, 2)
1921 GEN_VEXT_VV(vmacc_vv_w, 4)
1922 GEN_VEXT_VV(vmacc_vv_d, 8)
1923 GEN_VEXT_VV(vnmsac_vv_b, 1)
1924 GEN_VEXT_VV(vnmsac_vv_h, 2)
1925 GEN_VEXT_VV(vnmsac_vv_w, 4)
1926 GEN_VEXT_VV(vnmsac_vv_d, 8)
1927 GEN_VEXT_VV(vmadd_vv_b, 1)
1928 GEN_VEXT_VV(vmadd_vv_h, 2)
1929 GEN_VEXT_VV(vmadd_vv_w, 4)
1930 GEN_VEXT_VV(vmadd_vv_d, 8)
1931 GEN_VEXT_VV(vnmsub_vv_b, 1)
1932 GEN_VEXT_VV(vnmsub_vv_h, 2)
1933 GEN_VEXT_VV(vnmsub_vv_w, 4)
1934 GEN_VEXT_VV(vnmsub_vv_d, 8)
1935
1936 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1937 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1938 { \
1939 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1940 TD d = *((TD *)vd + HD(i)); \
1941 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1942 }
1943
1944 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1945 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1946 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1947 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1948 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1949 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1950 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1951 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1952 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1953 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1954 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1955 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1956 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1957 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1958 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1959 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1960 GEN_VEXT_VX(vmacc_vx_b, 1)
1961 GEN_VEXT_VX(vmacc_vx_h, 2)
1962 GEN_VEXT_VX(vmacc_vx_w, 4)
1963 GEN_VEXT_VX(vmacc_vx_d, 8)
1964 GEN_VEXT_VX(vnmsac_vx_b, 1)
1965 GEN_VEXT_VX(vnmsac_vx_h, 2)
1966 GEN_VEXT_VX(vnmsac_vx_w, 4)
1967 GEN_VEXT_VX(vnmsac_vx_d, 8)
1968 GEN_VEXT_VX(vmadd_vx_b, 1)
1969 GEN_VEXT_VX(vmadd_vx_h, 2)
1970 GEN_VEXT_VX(vmadd_vx_w, 4)
1971 GEN_VEXT_VX(vmadd_vx_d, 8)
1972 GEN_VEXT_VX(vnmsub_vx_b, 1)
1973 GEN_VEXT_VX(vnmsub_vx_h, 2)
1974 GEN_VEXT_VX(vnmsub_vx_w, 4)
1975 GEN_VEXT_VX(vnmsub_vx_d, 8)
1976
1977 /* Vector Widening Integer Multiply-Add Instructions */
1978 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1979 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1980 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1981 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1982 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1983 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1984 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1985 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1986 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1987 GEN_VEXT_VV(vwmaccu_vv_b, 2)
1988 GEN_VEXT_VV(vwmaccu_vv_h, 4)
1989 GEN_VEXT_VV(vwmaccu_vv_w, 8)
1990 GEN_VEXT_VV(vwmacc_vv_b, 2)
1991 GEN_VEXT_VV(vwmacc_vv_h, 4)
1992 GEN_VEXT_VV(vwmacc_vv_w, 8)
1993 GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1994 GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1995 GEN_VEXT_VV(vwmaccsu_vv_w, 8)
1996
1997 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1998 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1999 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
2000 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
2001 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
2002 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
2003 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
2004 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
2005 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
2006 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
2007 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
2008 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
2009 GEN_VEXT_VX(vwmaccu_vx_b, 2)
2010 GEN_VEXT_VX(vwmaccu_vx_h, 4)
2011 GEN_VEXT_VX(vwmaccu_vx_w, 8)
2012 GEN_VEXT_VX(vwmacc_vx_b, 2)
2013 GEN_VEXT_VX(vwmacc_vx_h, 4)
2014 GEN_VEXT_VX(vwmacc_vx_w, 8)
2015 GEN_VEXT_VX(vwmaccsu_vx_b, 2)
2016 GEN_VEXT_VX(vwmaccsu_vx_h, 4)
2017 GEN_VEXT_VX(vwmaccsu_vx_w, 8)
2018 GEN_VEXT_VX(vwmaccus_vx_b, 2)
2019 GEN_VEXT_VX(vwmaccus_vx_h, 4)
2020 GEN_VEXT_VX(vwmaccus_vx_w, 8)
2021
2022 /* Vector Integer Merge and Move Instructions */
2023 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
2024 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
2025 uint32_t desc) \
2026 { \
2027 uint32_t vl = env->vl; \
2028 uint32_t esz = sizeof(ETYPE); \
2029 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2030 uint32_t vta = vext_vta(desc); \
2031 uint32_t i; \
2032 \
2033 VSTART_CHECK_EARLY_EXIT(env); \
2034 \
2035 for (i = env->vstart; i < vl; i++) { \
2036 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2037 *((ETYPE *)vd + H(i)) = s1; \
2038 } \
2039 env->vstart = 0; \
2040 /* set tail elements to 1s */ \
2041 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2042 }
2043
2044 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2045 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2046 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2047 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
2048
2049 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
2050 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2051 uint32_t desc) \
2052 { \
2053 uint32_t vl = env->vl; \
2054 uint32_t esz = sizeof(ETYPE); \
2055 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2056 uint32_t vta = vext_vta(desc); \
2057 uint32_t i; \
2058 \
2059 VSTART_CHECK_EARLY_EXIT(env); \
2060 \
2061 for (i = env->vstart; i < vl; i++) { \
2062 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2063 } \
2064 env->vstart = 0; \
2065 /* set tail elements to 1s */ \
2066 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2067 }
2068
2069 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2070 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2071 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2072 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
2073
2074 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
2075 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2076 CPURISCVState *env, uint32_t desc) \
2077 { \
2078 uint32_t vl = env->vl; \
2079 uint32_t esz = sizeof(ETYPE); \
2080 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2081 uint32_t vta = vext_vta(desc); \
2082 uint32_t i; \
2083 \
2084 VSTART_CHECK_EARLY_EXIT(env); \
2085 \
2086 for (i = env->vstart; i < vl; i++) { \
2087 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
2088 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2089 } \
2090 env->vstart = 0; \
2091 /* set tail elements to 1s */ \
2092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2093 }
2094
2095 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2096 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2097 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2098 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
2099
2100 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
2101 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2102 void *vs2, CPURISCVState *env, uint32_t desc) \
2103 { \
2104 uint32_t vl = env->vl; \
2105 uint32_t esz = sizeof(ETYPE); \
2106 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2107 uint32_t vta = vext_vta(desc); \
2108 uint32_t i; \
2109 \
2110 VSTART_CHECK_EARLY_EXIT(env); \
2111 \
2112 for (i = env->vstart; i < vl; i++) { \
2113 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
2114 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
2115 (ETYPE)(target_long)s1); \
2116 *((ETYPE *)vd + H(i)) = d; \
2117 } \
2118 env->vstart = 0; \
2119 /* set tail elements to 1s */ \
2120 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2121 }
2122
2123 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2124 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2125 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2126 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
2127
2128 /*
2129 * Vector Fixed-Point Arithmetic Instructions
2130 */
2131
2132 /* Vector Single-Width Saturating Add and Subtract */
2133
2134 /*
2135 * As fixed point instructions probably have round mode and saturation,
2136 * define common macros for fixed point here.
2137 */
2138 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2139 CPURISCVState *env, int vxrm);
2140
2141 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2142 static inline void \
2143 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2144 CPURISCVState *env, int vxrm) \
2145 { \
2146 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2147 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2148 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2149 }
2150
2151 static inline void
vext_vv_rm_1(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivv2_rm_fn * fn,uint32_t vma,uint32_t esz)2152 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2153 CPURISCVState *env,
2154 uint32_t vl, uint32_t vm, int vxrm,
2155 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
2156 {
2157 VSTART_CHECK_EARLY_EXIT(env);
2158
2159 for (uint32_t i = env->vstart; i < vl; i++) {
2160 if (!vm && !vext_elem_mask(v0, i)) {
2161 /* set masked-off elements to 1s */
2162 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2163 continue;
2164 }
2165 fn(vd, vs1, vs2, i, env, vxrm);
2166 }
2167 env->vstart = 0;
2168 }
2169
2170 static inline void
vext_vv_rm_2(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t desc,opivv2_rm_fn * fn,uint32_t esz)2171 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2172 CPURISCVState *env,
2173 uint32_t desc,
2174 opivv2_rm_fn *fn, uint32_t esz)
2175 {
2176 uint32_t vm = vext_vm(desc);
2177 uint32_t vl = env->vl;
2178 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2179 uint32_t vta = vext_vta(desc);
2180 uint32_t vma = vext_vma(desc);
2181
2182 switch (env->vxrm) {
2183 case 0: /* rnu */
2184 vext_vv_rm_1(vd, v0, vs1, vs2,
2185 env, vl, vm, 0, fn, vma, esz);
2186 break;
2187 case 1: /* rne */
2188 vext_vv_rm_1(vd, v0, vs1, vs2,
2189 env, vl, vm, 1, fn, vma, esz);
2190 break;
2191 case 2: /* rdn */
2192 vext_vv_rm_1(vd, v0, vs1, vs2,
2193 env, vl, vm, 2, fn, vma, esz);
2194 break;
2195 default: /* rod */
2196 vext_vv_rm_1(vd, v0, vs1, vs2,
2197 env, vl, vm, 3, fn, vma, esz);
2198 break;
2199 }
2200 /* set tail elements to 1s */
2201 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2202 }
2203
2204 /* generate helpers for fixed point instructions with OPIVV format */
2205 #define GEN_VEXT_VV_RM(NAME, ESZ) \
2206 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2207 CPURISCVState *env, uint32_t desc) \
2208 { \
2209 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2210 do_##NAME, ESZ); \
2211 }
2212
saddu8(CPURISCVState * env,int vxrm,uint8_t a,uint8_t b)2213 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2214 uint8_t b)
2215 {
2216 uint8_t res = a + b;
2217 if (res < a) {
2218 res = UINT8_MAX;
2219 env->vxsat = 0x1;
2220 }
2221 return res;
2222 }
2223
saddu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2224 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2225 uint16_t b)
2226 {
2227 uint16_t res = a + b;
2228 if (res < a) {
2229 res = UINT16_MAX;
2230 env->vxsat = 0x1;
2231 }
2232 return res;
2233 }
2234
saddu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2235 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2236 uint32_t b)
2237 {
2238 uint32_t res = a + b;
2239 if (res < a) {
2240 res = UINT32_MAX;
2241 env->vxsat = 0x1;
2242 }
2243 return res;
2244 }
2245
saddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2246 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2247 uint64_t b)
2248 {
2249 uint64_t res = a + b;
2250 if (res < a) {
2251 res = UINT64_MAX;
2252 env->vxsat = 0x1;
2253 }
2254 return res;
2255 }
2256
2257 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2258 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2259 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2260 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2261 GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2262 GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2263 GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2264 GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
2265
2266 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2267 CPURISCVState *env, int vxrm);
2268
2269 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2270 static inline void \
2271 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2272 CPURISCVState *env, int vxrm) \
2273 { \
2274 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2275 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2276 }
2277
2278 static inline void
vext_vx_rm_1(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivx2_rm_fn * fn,uint32_t vma,uint32_t esz)2279 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2280 CPURISCVState *env,
2281 uint32_t vl, uint32_t vm, int vxrm,
2282 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
2283 {
2284 VSTART_CHECK_EARLY_EXIT(env);
2285
2286 for (uint32_t i = env->vstart; i < vl; i++) {
2287 if (!vm && !vext_elem_mask(v0, i)) {
2288 /* set masked-off elements to 1s */
2289 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2290 continue;
2291 }
2292 fn(vd, s1, vs2, i, env, vxrm);
2293 }
2294 env->vstart = 0;
2295 }
2296
2297 static inline void
vext_vx_rm_2(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t desc,opivx2_rm_fn * fn,uint32_t esz)2298 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2299 CPURISCVState *env,
2300 uint32_t desc,
2301 opivx2_rm_fn *fn, uint32_t esz)
2302 {
2303 uint32_t vm = vext_vm(desc);
2304 uint32_t vl = env->vl;
2305 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2306 uint32_t vta = vext_vta(desc);
2307 uint32_t vma = vext_vma(desc);
2308
2309 switch (env->vxrm) {
2310 case 0: /* rnu */
2311 vext_vx_rm_1(vd, v0, s1, vs2,
2312 env, vl, vm, 0, fn, vma, esz);
2313 break;
2314 case 1: /* rne */
2315 vext_vx_rm_1(vd, v0, s1, vs2,
2316 env, vl, vm, 1, fn, vma, esz);
2317 break;
2318 case 2: /* rdn */
2319 vext_vx_rm_1(vd, v0, s1, vs2,
2320 env, vl, vm, 2, fn, vma, esz);
2321 break;
2322 default: /* rod */
2323 vext_vx_rm_1(vd, v0, s1, vs2,
2324 env, vl, vm, 3, fn, vma, esz);
2325 break;
2326 }
2327 /* set tail elements to 1s */
2328 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2329 }
2330
2331 /* generate helpers for fixed point instructions with OPIVX format */
2332 #define GEN_VEXT_VX_RM(NAME, ESZ) \
2333 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2334 void *vs2, CPURISCVState *env, \
2335 uint32_t desc) \
2336 { \
2337 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2338 do_##NAME, ESZ); \
2339 }
2340
RVVCALL(OPIVX2_RM,vsaddu_vx_b,OP_UUU_B,H1,H1,saddu8)2341 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2342 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2343 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2344 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2345 GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2346 GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2347 GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2348 GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
2349
2350 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2351 {
2352 int8_t res = a + b;
2353 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2354 res = a > 0 ? INT8_MAX : INT8_MIN;
2355 env->vxsat = 0x1;
2356 }
2357 return res;
2358 }
2359
sadd16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2360 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2361 int16_t b)
2362 {
2363 int16_t res = a + b;
2364 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2365 res = a > 0 ? INT16_MAX : INT16_MIN;
2366 env->vxsat = 0x1;
2367 }
2368 return res;
2369 }
2370
sadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2371 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2372 int32_t b)
2373 {
2374 int32_t res = a + b;
2375 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2376 res = a > 0 ? INT32_MAX : INT32_MIN;
2377 env->vxsat = 0x1;
2378 }
2379 return res;
2380 }
2381
sadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2382 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2383 int64_t b)
2384 {
2385 int64_t res = a + b;
2386 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2387 res = a > 0 ? INT64_MAX : INT64_MIN;
2388 env->vxsat = 0x1;
2389 }
2390 return res;
2391 }
2392
RVVCALL(OPIVV2_RM,vsadd_vv_b,OP_SSS_B,H1,H1,H1,sadd8)2393 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2394 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2395 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2396 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2397 GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2398 GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2399 GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2400 GEN_VEXT_VV_RM(vsadd_vv_d, 8)
2401
2402 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2403 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2404 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2405 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2406 GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2407 GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2408 GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2409 GEN_VEXT_VX_RM(vsadd_vx_d, 8)
2410
2411 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2412 uint8_t b)
2413 {
2414 uint8_t res = a - b;
2415 if (res > a) {
2416 res = 0;
2417 env->vxsat = 0x1;
2418 }
2419 return res;
2420 }
2421
ssubu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2422 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2423 uint16_t b)
2424 {
2425 uint16_t res = a - b;
2426 if (res > a) {
2427 res = 0;
2428 env->vxsat = 0x1;
2429 }
2430 return res;
2431 }
2432
ssubu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2433 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2434 uint32_t b)
2435 {
2436 uint32_t res = a - b;
2437 if (res > a) {
2438 res = 0;
2439 env->vxsat = 0x1;
2440 }
2441 return res;
2442 }
2443
ssubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2444 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2445 uint64_t b)
2446 {
2447 uint64_t res = a - b;
2448 if (res > a) {
2449 res = 0;
2450 env->vxsat = 0x1;
2451 }
2452 return res;
2453 }
2454
RVVCALL(OPIVV2_RM,vssubu_vv_b,OP_UUU_B,H1,H1,H1,ssubu8)2455 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2456 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2457 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2458 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2459 GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2460 GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2461 GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2462 GEN_VEXT_VV_RM(vssubu_vv_d, 8)
2463
2464 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2465 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2466 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2467 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2468 GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2469 GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2470 GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2471 GEN_VEXT_VX_RM(vssubu_vx_d, 8)
2472
2473 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2474 {
2475 int8_t res = a - b;
2476 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2477 res = a >= 0 ? INT8_MAX : INT8_MIN;
2478 env->vxsat = 0x1;
2479 }
2480 return res;
2481 }
2482
ssub16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2483 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2484 int16_t b)
2485 {
2486 int16_t res = a - b;
2487 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2488 res = a >= 0 ? INT16_MAX : INT16_MIN;
2489 env->vxsat = 0x1;
2490 }
2491 return res;
2492 }
2493
ssub32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2494 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2495 int32_t b)
2496 {
2497 int32_t res = a - b;
2498 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2499 res = a >= 0 ? INT32_MAX : INT32_MIN;
2500 env->vxsat = 0x1;
2501 }
2502 return res;
2503 }
2504
ssub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2505 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2506 int64_t b)
2507 {
2508 int64_t res = a - b;
2509 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2510 res = a >= 0 ? INT64_MAX : INT64_MIN;
2511 env->vxsat = 0x1;
2512 }
2513 return res;
2514 }
2515
RVVCALL(OPIVV2_RM,vssub_vv_b,OP_SSS_B,H1,H1,H1,ssub8)2516 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2517 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2518 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2519 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2520 GEN_VEXT_VV_RM(vssub_vv_b, 1)
2521 GEN_VEXT_VV_RM(vssub_vv_h, 2)
2522 GEN_VEXT_VV_RM(vssub_vv_w, 4)
2523 GEN_VEXT_VV_RM(vssub_vv_d, 8)
2524
2525 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2526 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2527 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2528 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2529 GEN_VEXT_VX_RM(vssub_vx_b, 1)
2530 GEN_VEXT_VX_RM(vssub_vx_h, 2)
2531 GEN_VEXT_VX_RM(vssub_vx_w, 4)
2532 GEN_VEXT_VX_RM(vssub_vx_d, 8)
2533
2534 /* Vector Single-Width Averaging Add and Subtract */
2535 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2536 {
2537 uint8_t d = extract64(v, shift, 1);
2538 uint8_t d1;
2539 uint64_t D1, D2;
2540
2541 if (shift == 0 || shift > 64) {
2542 return 0;
2543 }
2544
2545 d1 = extract64(v, shift - 1, 1);
2546 D1 = extract64(v, 0, shift);
2547 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2548 return d1;
2549 } else if (vxrm == 1) { /* round-to-nearest-even */
2550 if (shift > 1) {
2551 D2 = extract64(v, 0, shift - 1);
2552 return d1 & ((D2 != 0) | d);
2553 } else {
2554 return d1 & d;
2555 }
2556 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2557 return !d & (D1 != 0);
2558 }
2559 return 0; /* round-down (truncate) */
2560 }
2561
aadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2562 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2563 int32_t b)
2564 {
2565 int64_t res = (int64_t)a + b;
2566 uint8_t round = get_round(vxrm, res, 1);
2567
2568 return (res >> 1) + round;
2569 }
2570
aadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2571 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2572 int64_t b)
2573 {
2574 int64_t res = a + b;
2575 uint8_t round = get_round(vxrm, res, 1);
2576 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2577
2578 /* With signed overflow, bit 64 is inverse of bit 63. */
2579 return ((res >> 1) ^ over) + round;
2580 }
2581
RVVCALL(OPIVV2_RM,vaadd_vv_b,OP_SSS_B,H1,H1,H1,aadd32)2582 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2583 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2584 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2585 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2586 GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2587 GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2588 GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2589 GEN_VEXT_VV_RM(vaadd_vv_d, 8)
2590
2591 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2592 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2593 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2594 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2595 GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2596 GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2597 GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2598 GEN_VEXT_VX_RM(vaadd_vx_d, 8)
2599
2600 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2601 uint32_t a, uint32_t b)
2602 {
2603 uint64_t res = (uint64_t)a + b;
2604 uint8_t round = get_round(vxrm, res, 1);
2605
2606 return (res >> 1) + round;
2607 }
2608
aaddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2609 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2610 uint64_t a, uint64_t b)
2611 {
2612 uint64_t res = a + b;
2613 uint8_t round = get_round(vxrm, res, 1);
2614 uint64_t over = (uint64_t)(res < a) << 63;
2615
2616 return ((res >> 1) | over) + round;
2617 }
2618
RVVCALL(OPIVV2_RM,vaaddu_vv_b,OP_UUU_B,H1,H1,H1,aaddu32)2619 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2620 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2621 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2622 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2623 GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2624 GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2625 GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2626 GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
2627
2628 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2629 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2630 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2631 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2632 GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2633 GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2634 GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2635 GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
2636
2637 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2638 int32_t b)
2639 {
2640 int64_t res = (int64_t)a - b;
2641 uint8_t round = get_round(vxrm, res, 1);
2642
2643 return (res >> 1) + round;
2644 }
2645
asub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2646 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2647 int64_t b)
2648 {
2649 int64_t res = (int64_t)a - b;
2650 uint8_t round = get_round(vxrm, res, 1);
2651 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2652
2653 /* With signed overflow, bit 64 is inverse of bit 63. */
2654 return ((res >> 1) ^ over) + round;
2655 }
2656
RVVCALL(OPIVV2_RM,vasub_vv_b,OP_SSS_B,H1,H1,H1,asub32)2657 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2658 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2659 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2660 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2661 GEN_VEXT_VV_RM(vasub_vv_b, 1)
2662 GEN_VEXT_VV_RM(vasub_vv_h, 2)
2663 GEN_VEXT_VV_RM(vasub_vv_w, 4)
2664 GEN_VEXT_VV_RM(vasub_vv_d, 8)
2665
2666 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2667 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2668 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2669 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2670 GEN_VEXT_VX_RM(vasub_vx_b, 1)
2671 GEN_VEXT_VX_RM(vasub_vx_h, 2)
2672 GEN_VEXT_VX_RM(vasub_vx_w, 4)
2673 GEN_VEXT_VX_RM(vasub_vx_d, 8)
2674
2675 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2676 uint32_t a, uint32_t b)
2677 {
2678 int64_t res = (int64_t)a - b;
2679 uint8_t round = get_round(vxrm, res, 1);
2680
2681 return (res >> 1) + round;
2682 }
2683
asubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2684 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2685 uint64_t a, uint64_t b)
2686 {
2687 uint64_t res = (uint64_t)a - b;
2688 uint8_t round = get_round(vxrm, res, 1);
2689 uint64_t over = (uint64_t)(res > a) << 63;
2690
2691 return ((res >> 1) | over) + round;
2692 }
2693
RVVCALL(OPIVV2_RM,vasubu_vv_b,OP_UUU_B,H1,H1,H1,asubu32)2694 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2695 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2696 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2697 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2698 GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2699 GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2700 GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2701 GEN_VEXT_VV_RM(vasubu_vv_d, 8)
2702
2703 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2704 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2705 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2706 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2707 GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2708 GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2709 GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2710 GEN_VEXT_VX_RM(vasubu_vx_d, 8)
2711
2712 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2713 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2714 {
2715 uint8_t round;
2716 int16_t res;
2717
2718 res = (int16_t)a * (int16_t)b;
2719 round = get_round(vxrm, res, 7);
2720 res = (res >> 7) + round;
2721
2722 if (res > INT8_MAX) {
2723 env->vxsat = 0x1;
2724 return INT8_MAX;
2725 } else if (res < INT8_MIN) {
2726 env->vxsat = 0x1;
2727 return INT8_MIN;
2728 } else {
2729 return res;
2730 }
2731 }
2732
vsmul16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2733 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2734 {
2735 uint8_t round;
2736 int32_t res;
2737
2738 res = (int32_t)a * (int32_t)b;
2739 round = get_round(vxrm, res, 15);
2740 res = (res >> 15) + round;
2741
2742 if (res > INT16_MAX) {
2743 env->vxsat = 0x1;
2744 return INT16_MAX;
2745 } else if (res < INT16_MIN) {
2746 env->vxsat = 0x1;
2747 return INT16_MIN;
2748 } else {
2749 return res;
2750 }
2751 }
2752
vsmul32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2753 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2754 {
2755 uint8_t round;
2756 int64_t res;
2757
2758 res = (int64_t)a * (int64_t)b;
2759 round = get_round(vxrm, res, 31);
2760 res = (res >> 31) + round;
2761
2762 if (res > INT32_MAX) {
2763 env->vxsat = 0x1;
2764 return INT32_MAX;
2765 } else if (res < INT32_MIN) {
2766 env->vxsat = 0x1;
2767 return INT32_MIN;
2768 } else {
2769 return res;
2770 }
2771 }
2772
vsmul64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2773 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2774 {
2775 uint8_t round;
2776 uint64_t hi_64, lo_64;
2777 int64_t res;
2778
2779 if (a == INT64_MIN && b == INT64_MIN) {
2780 env->vxsat = 1;
2781 return INT64_MAX;
2782 }
2783
2784 muls64(&lo_64, &hi_64, a, b);
2785 round = get_round(vxrm, lo_64, 63);
2786 /*
2787 * Cannot overflow, as there are always
2788 * 2 sign bits after multiply.
2789 */
2790 res = (hi_64 << 1) | (lo_64 >> 63);
2791 if (round) {
2792 if (res == INT64_MAX) {
2793 env->vxsat = 1;
2794 } else {
2795 res += 1;
2796 }
2797 }
2798 return res;
2799 }
2800
RVVCALL(OPIVV2_RM,vsmul_vv_b,OP_SSS_B,H1,H1,H1,vsmul8)2801 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2802 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2803 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2804 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2805 GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2806 GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2807 GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2808 GEN_VEXT_VV_RM(vsmul_vv_d, 8)
2809
2810 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2811 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2812 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2813 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2814 GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2815 GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2816 GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2817 GEN_VEXT_VX_RM(vsmul_vx_d, 8)
2818
2819 /* Vector Single-Width Scaling Shift Instructions */
2820 static inline uint8_t
2821 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2822 {
2823 uint8_t round, shift = b & 0x7;
2824 uint8_t res;
2825
2826 round = get_round(vxrm, a, shift);
2827 res = (a >> shift) + round;
2828 return res;
2829 }
2830 static inline uint16_t
vssrl16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2831 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2832 {
2833 uint8_t round, shift = b & 0xf;
2834
2835 round = get_round(vxrm, a, shift);
2836 return (a >> shift) + round;
2837 }
2838 static inline uint32_t
vssrl32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2839 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2840 {
2841 uint8_t round, shift = b & 0x1f;
2842
2843 round = get_round(vxrm, a, shift);
2844 return (a >> shift) + round;
2845 }
2846 static inline uint64_t
vssrl64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2847 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2848 {
2849 uint8_t round, shift = b & 0x3f;
2850
2851 round = get_round(vxrm, a, shift);
2852 return (a >> shift) + round;
2853 }
RVVCALL(OPIVV2_RM,vssrl_vv_b,OP_UUU_B,H1,H1,H1,vssrl8)2854 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2855 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2856 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2857 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2858 GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2859 GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2860 GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2861 GEN_VEXT_VV_RM(vssrl_vv_d, 8)
2862
2863 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2864 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2865 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2866 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2867 GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2868 GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2869 GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2870 GEN_VEXT_VX_RM(vssrl_vx_d, 8)
2871
2872 static inline int8_t
2873 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2874 {
2875 uint8_t round, shift = b & 0x7;
2876
2877 round = get_round(vxrm, a, shift);
2878 return (a >> shift) + round;
2879 }
2880 static inline int16_t
vssra16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2881 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2882 {
2883 uint8_t round, shift = b & 0xf;
2884
2885 round = get_round(vxrm, a, shift);
2886 return (a >> shift) + round;
2887 }
2888 static inline int32_t
vssra32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2889 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2890 {
2891 uint8_t round, shift = b & 0x1f;
2892
2893 round = get_round(vxrm, a, shift);
2894 return (a >> shift) + round;
2895 }
2896 static inline int64_t
vssra64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2897 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2898 {
2899 uint8_t round, shift = b & 0x3f;
2900
2901 round = get_round(vxrm, a, shift);
2902 return (a >> shift) + round;
2903 }
2904
RVVCALL(OPIVV2_RM,vssra_vv_b,OP_SSS_B,H1,H1,H1,vssra8)2905 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2906 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2907 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2908 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2909 GEN_VEXT_VV_RM(vssra_vv_b, 1)
2910 GEN_VEXT_VV_RM(vssra_vv_h, 2)
2911 GEN_VEXT_VV_RM(vssra_vv_w, 4)
2912 GEN_VEXT_VV_RM(vssra_vv_d, 8)
2913
2914 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2915 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2916 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2917 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2918 GEN_VEXT_VX_RM(vssra_vx_b, 1)
2919 GEN_VEXT_VX_RM(vssra_vx_h, 2)
2920 GEN_VEXT_VX_RM(vssra_vx_w, 4)
2921 GEN_VEXT_VX_RM(vssra_vx_d, 8)
2922
2923 /* Vector Narrowing Fixed-Point Clip Instructions */
2924 static inline int8_t
2925 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2926 {
2927 uint8_t round, shift = b & 0xf;
2928 int16_t res;
2929
2930 round = get_round(vxrm, a, shift);
2931 res = (a >> shift) + round;
2932 if (res > INT8_MAX) {
2933 env->vxsat = 0x1;
2934 return INT8_MAX;
2935 } else if (res < INT8_MIN) {
2936 env->vxsat = 0x1;
2937 return INT8_MIN;
2938 } else {
2939 return res;
2940 }
2941 }
2942
2943 static inline int16_t
vnclip16(CPURISCVState * env,int vxrm,int32_t a,int16_t b)2944 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2945 {
2946 uint8_t round, shift = b & 0x1f;
2947 int32_t res;
2948
2949 round = get_round(vxrm, a, shift);
2950 res = (a >> shift) + round;
2951 if (res > INT16_MAX) {
2952 env->vxsat = 0x1;
2953 return INT16_MAX;
2954 } else if (res < INT16_MIN) {
2955 env->vxsat = 0x1;
2956 return INT16_MIN;
2957 } else {
2958 return res;
2959 }
2960 }
2961
2962 static inline int32_t
vnclip32(CPURISCVState * env,int vxrm,int64_t a,int32_t b)2963 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2964 {
2965 uint8_t round, shift = b & 0x3f;
2966 int64_t res;
2967
2968 round = get_round(vxrm, a, shift);
2969 res = (a >> shift) + round;
2970 if (res > INT32_MAX) {
2971 env->vxsat = 0x1;
2972 return INT32_MAX;
2973 } else if (res < INT32_MIN) {
2974 env->vxsat = 0x1;
2975 return INT32_MIN;
2976 } else {
2977 return res;
2978 }
2979 }
2980
RVVCALL(OPIVV2_RM,vnclip_wv_b,NOP_SSS_B,H1,H2,H1,vnclip8)2981 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2982 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2983 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2984 GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2985 GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2986 GEN_VEXT_VV_RM(vnclip_wv_w, 4)
2987
2988 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2989 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2990 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2991 GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2992 GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2993 GEN_VEXT_VX_RM(vnclip_wx_w, 4)
2994
2995 static inline uint8_t
2996 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2997 {
2998 uint8_t round, shift = b & 0xf;
2999 uint16_t res;
3000
3001 round = get_round(vxrm, a, shift);
3002 res = (a >> shift) + round;
3003 if (res > UINT8_MAX) {
3004 env->vxsat = 0x1;
3005 return UINT8_MAX;
3006 } else {
3007 return res;
3008 }
3009 }
3010
3011 static inline uint16_t
vnclipu16(CPURISCVState * env,int vxrm,uint32_t a,uint16_t b)3012 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
3013 {
3014 uint8_t round, shift = b & 0x1f;
3015 uint32_t res;
3016
3017 round = get_round(vxrm, a, shift);
3018 res = (a >> shift) + round;
3019 if (res > UINT16_MAX) {
3020 env->vxsat = 0x1;
3021 return UINT16_MAX;
3022 } else {
3023 return res;
3024 }
3025 }
3026
3027 static inline uint32_t
vnclipu32(CPURISCVState * env,int vxrm,uint64_t a,uint32_t b)3028 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
3029 {
3030 uint8_t round, shift = b & 0x3f;
3031 uint64_t res;
3032
3033 round = get_round(vxrm, a, shift);
3034 res = (a >> shift) + round;
3035 if (res > UINT32_MAX) {
3036 env->vxsat = 0x1;
3037 return UINT32_MAX;
3038 } else {
3039 return res;
3040 }
3041 }
3042
RVVCALL(OPIVV2_RM,vnclipu_wv_b,NOP_UUU_B,H1,H2,H1,vnclipu8)3043 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3044 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3045 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
3046 GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
3047 GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
3048 GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
3049
3050 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3051 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3052 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
3053 GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3054 GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3055 GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
3056
3057 /*
3058 * Vector Float Point Arithmetic Instructions
3059 */
3060 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
3061 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3062 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3063 CPURISCVState *env) \
3064 { \
3065 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3066 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3067 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3068 }
3069
3070 #define GEN_VEXT_VV_ENV(NAME, ESZ) \
3071 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3072 void *vs2, CPURISCVState *env, \
3073 uint32_t desc) \
3074 { \
3075 uint32_t vm = vext_vm(desc); \
3076 uint32_t vl = env->vl; \
3077 uint32_t total_elems = \
3078 vext_get_total_elems(env, desc, ESZ); \
3079 uint32_t vta = vext_vta(desc); \
3080 uint32_t vma = vext_vma(desc); \
3081 uint32_t i; \
3082 \
3083 VSTART_CHECK_EARLY_EXIT(env); \
3084 \
3085 for (i = env->vstart; i < vl; i++) { \
3086 if (!vm && !vext_elem_mask(v0, i)) { \
3087 /* set masked-off elements to 1s */ \
3088 vext_set_elems_1s(vd, vma, i * ESZ, \
3089 (i + 1) * ESZ); \
3090 continue; \
3091 } \
3092 do_##NAME(vd, vs1, vs2, i, env); \
3093 } \
3094 env->vstart = 0; \
3095 /* set tail elements to 1s */ \
3096 vext_set_elems_1s(vd, vta, vl * ESZ, \
3097 total_elems * ESZ); \
3098 }
3099
3100 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3101 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3102 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3103 GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3104 GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3105 GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
3106
3107 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3108 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3109 CPURISCVState *env) \
3110 { \
3111 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3112 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3113 }
3114
3115 #define GEN_VEXT_VF(NAME, ESZ) \
3116 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3117 void *vs2, CPURISCVState *env, \
3118 uint32_t desc) \
3119 { \
3120 uint32_t vm = vext_vm(desc); \
3121 uint32_t vl = env->vl; \
3122 uint32_t total_elems = \
3123 vext_get_total_elems(env, desc, ESZ); \
3124 uint32_t vta = vext_vta(desc); \
3125 uint32_t vma = vext_vma(desc); \
3126 uint32_t i; \
3127 \
3128 VSTART_CHECK_EARLY_EXIT(env); \
3129 \
3130 for (i = env->vstart; i < vl; i++) { \
3131 if (!vm && !vext_elem_mask(v0, i)) { \
3132 /* set masked-off elements to 1s */ \
3133 vext_set_elems_1s(vd, vma, i * ESZ, \
3134 (i + 1) * ESZ); \
3135 continue; \
3136 } \
3137 do_##NAME(vd, s1, vs2, i, env); \
3138 } \
3139 env->vstart = 0; \
3140 /* set tail elements to 1s */ \
3141 vext_set_elems_1s(vd, vta, vl * ESZ, \
3142 total_elems * ESZ); \
3143 }
3144
3145 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3146 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3147 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3148 GEN_VEXT_VF(vfadd_vf_h, 2)
3149 GEN_VEXT_VF(vfadd_vf_w, 4)
3150 GEN_VEXT_VF(vfadd_vf_d, 8)
3151
3152 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3153 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3154 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3155 GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3156 GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3157 GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
3158 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3159 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3160 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3161 GEN_VEXT_VF(vfsub_vf_h, 2)
3162 GEN_VEXT_VF(vfsub_vf_w, 4)
3163 GEN_VEXT_VF(vfsub_vf_d, 8)
3164
3165 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3166 {
3167 return float16_sub(b, a, s);
3168 }
3169
float32_rsub(uint32_t a,uint32_t b,float_status * s)3170 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3171 {
3172 return float32_sub(b, a, s);
3173 }
3174
float64_rsub(uint64_t a,uint64_t b,float_status * s)3175 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3176 {
3177 return float64_sub(b, a, s);
3178 }
3179
RVVCALL(OPFVF2,vfrsub_vf_h,OP_UUU_H,H2,H2,float16_rsub)3180 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3181 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3182 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3183 GEN_VEXT_VF(vfrsub_vf_h, 2)
3184 GEN_VEXT_VF(vfrsub_vf_w, 4)
3185 GEN_VEXT_VF(vfrsub_vf_d, 8)
3186
3187 /* Vector Widening Floating-Point Add/Subtract Instructions */
3188 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3189 {
3190 return float32_add(float16_to_float32(a, true, s),
3191 float16_to_float32(b, true, s), s);
3192 }
3193
vfwadd32(uint32_t a,uint32_t b,float_status * s)3194 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3195 {
3196 return float64_add(float32_to_float64(a, s),
3197 float32_to_float64(b, s), s);
3198
3199 }
3200
RVVCALL(OPFVV2,vfwadd_vv_h,WOP_UUU_H,H4,H2,H2,vfwadd16)3201 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3202 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3203 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3204 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
3205 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3206 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3207 GEN_VEXT_VF(vfwadd_vf_h, 4)
3208 GEN_VEXT_VF(vfwadd_vf_w, 8)
3209
3210 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3211 {
3212 return float32_sub(float16_to_float32(a, true, s),
3213 float16_to_float32(b, true, s), s);
3214 }
3215
vfwsub32(uint32_t a,uint32_t b,float_status * s)3216 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3217 {
3218 return float64_sub(float32_to_float64(a, s),
3219 float32_to_float64(b, s), s);
3220
3221 }
3222
RVVCALL(OPFVV2,vfwsub_vv_h,WOP_UUU_H,H4,H2,H2,vfwsub16)3223 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3224 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3225 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3226 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
3227 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3228 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3229 GEN_VEXT_VF(vfwsub_vf_h, 4)
3230 GEN_VEXT_VF(vfwsub_vf_w, 8)
3231
3232 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3233 {
3234 return float32_add(a, float16_to_float32(b, true, s), s);
3235 }
3236
vfwaddw32(uint64_t a,uint32_t b,float_status * s)3237 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3238 {
3239 return float64_add(a, float32_to_float64(b, s), s);
3240 }
3241
RVVCALL(OPFVV2,vfwadd_wv_h,WOP_WUUU_H,H4,H2,H2,vfwaddw16)3242 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3243 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3244 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3245 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
3246 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3247 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3248 GEN_VEXT_VF(vfwadd_wf_h, 4)
3249 GEN_VEXT_VF(vfwadd_wf_w, 8)
3250
3251 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3252 {
3253 return float32_sub(a, float16_to_float32(b, true, s), s);
3254 }
3255
vfwsubw32(uint64_t a,uint32_t b,float_status * s)3256 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3257 {
3258 return float64_sub(a, float32_to_float64(b, s), s);
3259 }
3260
RVVCALL(OPFVV2,vfwsub_wv_h,WOP_WUUU_H,H4,H2,H2,vfwsubw16)3261 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3262 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3263 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3264 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
3265 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3266 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3267 GEN_VEXT_VF(vfwsub_wf_h, 4)
3268 GEN_VEXT_VF(vfwsub_wf_w, 8)
3269
3270 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3271 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3272 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3273 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3274 GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3275 GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3276 GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
3277 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3278 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3279 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3280 GEN_VEXT_VF(vfmul_vf_h, 2)
3281 GEN_VEXT_VF(vfmul_vf_w, 4)
3282 GEN_VEXT_VF(vfmul_vf_d, 8)
3283
3284 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3285 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3286 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3287 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3288 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3289 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
3290 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3291 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3292 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3293 GEN_VEXT_VF(vfdiv_vf_h, 2)
3294 GEN_VEXT_VF(vfdiv_vf_w, 4)
3295 GEN_VEXT_VF(vfdiv_vf_d, 8)
3296
3297 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3298 {
3299 return float16_div(b, a, s);
3300 }
3301
float32_rdiv(uint32_t a,uint32_t b,float_status * s)3302 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3303 {
3304 return float32_div(b, a, s);
3305 }
3306
float64_rdiv(uint64_t a,uint64_t b,float_status * s)3307 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3308 {
3309 return float64_div(b, a, s);
3310 }
3311
RVVCALL(OPFVF2,vfrdiv_vf_h,OP_UUU_H,H2,H2,float16_rdiv)3312 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3313 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3314 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3315 GEN_VEXT_VF(vfrdiv_vf_h, 2)
3316 GEN_VEXT_VF(vfrdiv_vf_w, 4)
3317 GEN_VEXT_VF(vfrdiv_vf_d, 8)
3318
3319 /* Vector Widening Floating-Point Multiply */
3320 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3321 {
3322 return float32_mul(float16_to_float32(a, true, s),
3323 float16_to_float32(b, true, s), s);
3324 }
3325
vfwmul32(uint32_t a,uint32_t b,float_status * s)3326 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3327 {
3328 return float64_mul(float32_to_float64(a, s),
3329 float32_to_float64(b, s), s);
3330
3331 }
RVVCALL(OPFVV2,vfwmul_vv_h,WOP_UUU_H,H4,H2,H2,vfwmul16)3332 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3333 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3334 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3335 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
3336 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3337 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3338 GEN_VEXT_VF(vfwmul_vf_h, 4)
3339 GEN_VEXT_VF(vfwmul_vf_w, 8)
3340
3341 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3342 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3343 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3344 CPURISCVState *env) \
3345 { \
3346 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3347 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3348 TD d = *((TD *)vd + HD(i)); \
3349 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3350 }
3351
3352 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3353 {
3354 return float16_muladd(a, b, d, 0, s);
3355 }
3356
fmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3357 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3358 {
3359 return float32_muladd(a, b, d, 0, s);
3360 }
3361
fmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3362 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3363 {
3364 return float64_muladd(a, b, d, 0, s);
3365 }
3366
RVVCALL(OPFVV3,vfmacc_vv_h,OP_UUU_H,H2,H2,H2,fmacc16)3367 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3368 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3369 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3370 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3371 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3372 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
3373
3374 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3375 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3376 CPURISCVState *env) \
3377 { \
3378 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3379 TD d = *((TD *)vd + HD(i)); \
3380 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3381 }
3382
3383 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3384 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3385 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3386 GEN_VEXT_VF(vfmacc_vf_h, 2)
3387 GEN_VEXT_VF(vfmacc_vf_w, 4)
3388 GEN_VEXT_VF(vfmacc_vf_d, 8)
3389
3390 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3391 {
3392 return float16_muladd(a, b, d, float_muladd_negate_c |
3393 float_muladd_negate_product, s);
3394 }
3395
fnmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3396 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3397 {
3398 return float32_muladd(a, b, d, float_muladd_negate_c |
3399 float_muladd_negate_product, s);
3400 }
3401
fnmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3402 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3403 {
3404 return float64_muladd(a, b, d, float_muladd_negate_c |
3405 float_muladd_negate_product, s);
3406 }
3407
RVVCALL(OPFVV3,vfnmacc_vv_h,OP_UUU_H,H2,H2,H2,fnmacc16)3408 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3409 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3410 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3411 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3412 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3413 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
3414 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3415 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3416 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3417 GEN_VEXT_VF(vfnmacc_vf_h, 2)
3418 GEN_VEXT_VF(vfnmacc_vf_w, 4)
3419 GEN_VEXT_VF(vfnmacc_vf_d, 8)
3420
3421 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3422 {
3423 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3424 }
3425
fmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3426 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3427 {
3428 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3429 }
3430
fmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3431 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3432 {
3433 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3434 }
3435
RVVCALL(OPFVV3,vfmsac_vv_h,OP_UUU_H,H2,H2,H2,fmsac16)3436 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3437 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3438 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3439 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3440 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3441 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
3442 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3443 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3444 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3445 GEN_VEXT_VF(vfmsac_vf_h, 2)
3446 GEN_VEXT_VF(vfmsac_vf_w, 4)
3447 GEN_VEXT_VF(vfmsac_vf_d, 8)
3448
3449 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3450 {
3451 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3452 }
3453
fnmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3454 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3455 {
3456 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3457 }
3458
fnmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3459 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3460 {
3461 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3462 }
3463
RVVCALL(OPFVV3,vfnmsac_vv_h,OP_UUU_H,H2,H2,H2,fnmsac16)3464 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3465 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3466 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3467 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3468 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3469 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
3470 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3471 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3472 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3473 GEN_VEXT_VF(vfnmsac_vf_h, 2)
3474 GEN_VEXT_VF(vfnmsac_vf_w, 4)
3475 GEN_VEXT_VF(vfnmsac_vf_d, 8)
3476
3477 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3478 {
3479 return float16_muladd(d, b, a, 0, s);
3480 }
3481
fmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3482 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3483 {
3484 return float32_muladd(d, b, a, 0, s);
3485 }
3486
fmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3487 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3488 {
3489 return float64_muladd(d, b, a, 0, s);
3490 }
3491
RVVCALL(OPFVV3,vfmadd_vv_h,OP_UUU_H,H2,H2,H2,fmadd16)3492 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3493 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3494 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3495 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3496 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3497 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
3498 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3499 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3500 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3501 GEN_VEXT_VF(vfmadd_vf_h, 2)
3502 GEN_VEXT_VF(vfmadd_vf_w, 4)
3503 GEN_VEXT_VF(vfmadd_vf_d, 8)
3504
3505 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3506 {
3507 return float16_muladd(d, b, a, float_muladd_negate_c |
3508 float_muladd_negate_product, s);
3509 }
3510
fnmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3511 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3512 {
3513 return float32_muladd(d, b, a, float_muladd_negate_c |
3514 float_muladd_negate_product, s);
3515 }
3516
fnmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3517 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3518 {
3519 return float64_muladd(d, b, a, float_muladd_negate_c |
3520 float_muladd_negate_product, s);
3521 }
3522
RVVCALL(OPFVV3,vfnmadd_vv_h,OP_UUU_H,H2,H2,H2,fnmadd16)3523 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3524 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3525 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3526 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3527 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3528 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
3529 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3530 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3531 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3532 GEN_VEXT_VF(vfnmadd_vf_h, 2)
3533 GEN_VEXT_VF(vfnmadd_vf_w, 4)
3534 GEN_VEXT_VF(vfnmadd_vf_d, 8)
3535
3536 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3537 {
3538 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3539 }
3540
fmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3541 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3542 {
3543 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3544 }
3545
fmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3546 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3547 {
3548 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3549 }
3550
RVVCALL(OPFVV3,vfmsub_vv_h,OP_UUU_H,H2,H2,H2,fmsub16)3551 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3552 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3553 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3554 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3555 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3556 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
3557 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3558 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3559 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3560 GEN_VEXT_VF(vfmsub_vf_h, 2)
3561 GEN_VEXT_VF(vfmsub_vf_w, 4)
3562 GEN_VEXT_VF(vfmsub_vf_d, 8)
3563
3564 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3565 {
3566 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3567 }
3568
fnmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3569 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3570 {
3571 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3572 }
3573
fnmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3574 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3575 {
3576 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3577 }
3578
RVVCALL(OPFVV3,vfnmsub_vv_h,OP_UUU_H,H2,H2,H2,fnmsub16)3579 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3580 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3581 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3582 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3583 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3584 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
3585 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3586 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3587 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3588 GEN_VEXT_VF(vfnmsub_vf_h, 2)
3589 GEN_VEXT_VF(vfnmsub_vf_w, 4)
3590 GEN_VEXT_VF(vfnmsub_vf_d, 8)
3591
3592 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3593 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3594 {
3595 return float32_muladd(float16_to_float32(a, true, s),
3596 float16_to_float32(b, true, s), d, 0, s);
3597 }
3598
fwmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3599 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3600 {
3601 return float64_muladd(float32_to_float64(a, s),
3602 float32_to_float64(b, s), d, 0, s);
3603 }
3604
RVVCALL(OPFVV3,vfwmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwmacc16)3605 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3606 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3607 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3608 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
3609 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3610 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3611 GEN_VEXT_VF(vfwmacc_vf_h, 4)
3612 GEN_VEXT_VF(vfwmacc_vf_w, 8)
3613
3614 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3615 {
3616 return float32_muladd(bfloat16_to_float32(a, s),
3617 bfloat16_to_float32(b, s), d, 0, s);
3618 }
3619
RVVCALL(OPFVV3,vfwmaccbf16_vv,WOP_UUU_H,H4,H2,H2,fwmaccbf16)3620 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3621 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3622 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
3623 GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3624
3625 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3626 {
3627 return float32_muladd(float16_to_float32(a, true, s),
3628 float16_to_float32(b, true, s), d,
3629 float_muladd_negate_c | float_muladd_negate_product,
3630 s);
3631 }
3632
fwnmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3633 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3634 {
3635 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3636 d, float_muladd_negate_c |
3637 float_muladd_negate_product, s);
3638 }
3639
RVVCALL(OPFVV3,vfwnmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwnmacc16)3640 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3641 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3642 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3643 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
3644 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3645 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3646 GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3647 GEN_VEXT_VF(vfwnmacc_vf_w, 8)
3648
3649 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3650 {
3651 return float32_muladd(float16_to_float32(a, true, s),
3652 float16_to_float32(b, true, s), d,
3653 float_muladd_negate_c, s);
3654 }
3655
fwmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3656 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3657 {
3658 return float64_muladd(float32_to_float64(a, s),
3659 float32_to_float64(b, s), d,
3660 float_muladd_negate_c, s);
3661 }
3662
RVVCALL(OPFVV3,vfwmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwmsac16)3663 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3664 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3665 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3666 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
3667 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3668 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3669 GEN_VEXT_VF(vfwmsac_vf_h, 4)
3670 GEN_VEXT_VF(vfwmsac_vf_w, 8)
3671
3672 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3673 {
3674 return float32_muladd(float16_to_float32(a, true, s),
3675 float16_to_float32(b, true, s), d,
3676 float_muladd_negate_product, s);
3677 }
3678
fwnmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3679 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3680 {
3681 return float64_muladd(float32_to_float64(a, s),
3682 float32_to_float64(b, s), d,
3683 float_muladd_negate_product, s);
3684 }
3685
RVVCALL(OPFVV3,vfwnmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwnmsac16)3686 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3687 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3688 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3689 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
3690 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3691 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3692 GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3693 GEN_VEXT_VF(vfwnmsac_vf_w, 8)
3694
3695 /* Vector Floating-Point Square-Root Instruction */
3696 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3697 static void do_##NAME(void *vd, void *vs2, int i, \
3698 CPURISCVState *env) \
3699 { \
3700 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3701 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3702 }
3703
3704 #define GEN_VEXT_V_ENV(NAME, ESZ) \
3705 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3706 CPURISCVState *env, uint32_t desc) \
3707 { \
3708 uint32_t vm = vext_vm(desc); \
3709 uint32_t vl = env->vl; \
3710 uint32_t total_elems = \
3711 vext_get_total_elems(env, desc, ESZ); \
3712 uint32_t vta = vext_vta(desc); \
3713 uint32_t vma = vext_vma(desc); \
3714 uint32_t i; \
3715 \
3716 VSTART_CHECK_EARLY_EXIT(env); \
3717 \
3718 if (vl == 0) { \
3719 return; \
3720 } \
3721 for (i = env->vstart; i < vl; i++) { \
3722 if (!vm && !vext_elem_mask(v0, i)) { \
3723 /* set masked-off elements to 1s */ \
3724 vext_set_elems_1s(vd, vma, i * ESZ, \
3725 (i + 1) * ESZ); \
3726 continue; \
3727 } \
3728 do_##NAME(vd, vs2, i, env); \
3729 } \
3730 env->vstart = 0; \
3731 vext_set_elems_1s(vd, vta, vl * ESZ, \
3732 total_elems * ESZ); \
3733 }
3734
3735 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3736 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3737 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3738 GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3739 GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3740 GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
3741
3742 /*
3743 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3744 *
3745 * Adapted from riscv-v-spec recip.c:
3746 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3747 */
3748 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3749 {
3750 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3751 uint64_t exp = extract64(f, frac_size, exp_size);
3752 uint64_t frac = extract64(f, 0, frac_size);
3753
3754 const uint8_t lookup_table[] = {
3755 52, 51, 50, 48, 47, 46, 44, 43,
3756 42, 41, 40, 39, 38, 36, 35, 34,
3757 33, 32, 31, 30, 30, 29, 28, 27,
3758 26, 25, 24, 23, 23, 22, 21, 20,
3759 19, 19, 18, 17, 16, 16, 15, 14,
3760 14, 13, 12, 12, 11, 10, 10, 9,
3761 9, 8, 7, 7, 6, 6, 5, 4,
3762 4, 3, 3, 2, 2, 1, 1, 0,
3763 127, 125, 123, 121, 119, 118, 116, 114,
3764 113, 111, 109, 108, 106, 105, 103, 102,
3765 100, 99, 97, 96, 95, 93, 92, 91,
3766 90, 88, 87, 86, 85, 84, 83, 82,
3767 80, 79, 78, 77, 76, 75, 74, 73,
3768 72, 71, 70, 70, 69, 68, 67, 66,
3769 65, 64, 63, 63, 62, 61, 60, 59,
3770 59, 58, 57, 56, 56, 55, 54, 53
3771 };
3772 const int precision = 7;
3773
3774 if (exp == 0 && frac != 0) { /* subnormal */
3775 /* Normalize the subnormal. */
3776 while (extract64(frac, frac_size - 1, 1) == 0) {
3777 exp--;
3778 frac <<= 1;
3779 }
3780
3781 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3782 }
3783
3784 int idx = ((exp & 1) << (precision - 1)) |
3785 (frac >> (frac_size - precision + 1));
3786 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3787 (frac_size - precision);
3788 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3789
3790 uint64_t val = 0;
3791 val = deposit64(val, 0, frac_size, out_frac);
3792 val = deposit64(val, frac_size, exp_size, out_exp);
3793 val = deposit64(val, frac_size + exp_size, 1, sign);
3794 return val;
3795 }
3796
frsqrt7_h(float16 f,float_status * s)3797 static float16 frsqrt7_h(float16 f, float_status *s)
3798 {
3799 int exp_size = 5, frac_size = 10;
3800 bool sign = float16_is_neg(f);
3801
3802 /*
3803 * frsqrt7(sNaN) = canonical NaN
3804 * frsqrt7(-inf) = canonical NaN
3805 * frsqrt7(-normal) = canonical NaN
3806 * frsqrt7(-subnormal) = canonical NaN
3807 */
3808 if (float16_is_signaling_nan(f, s) ||
3809 (float16_is_infinity(f) && sign) ||
3810 (float16_is_normal(f) && sign) ||
3811 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3812 s->float_exception_flags |= float_flag_invalid;
3813 return float16_default_nan(s);
3814 }
3815
3816 /* frsqrt7(qNaN) = canonical NaN */
3817 if (float16_is_quiet_nan(f, s)) {
3818 return float16_default_nan(s);
3819 }
3820
3821 /* frsqrt7(+-0) = +-inf */
3822 if (float16_is_zero(f)) {
3823 s->float_exception_flags |= float_flag_divbyzero;
3824 return float16_set_sign(float16_infinity, sign);
3825 }
3826
3827 /* frsqrt7(+inf) = +0 */
3828 if (float16_is_infinity(f) && !sign) {
3829 return float16_set_sign(float16_zero, sign);
3830 }
3831
3832 /* +normal, +subnormal */
3833 uint64_t val = frsqrt7(f, exp_size, frac_size);
3834 return make_float16(val);
3835 }
3836
frsqrt7_s(float32 f,float_status * s)3837 static float32 frsqrt7_s(float32 f, float_status *s)
3838 {
3839 int exp_size = 8, frac_size = 23;
3840 bool sign = float32_is_neg(f);
3841
3842 /*
3843 * frsqrt7(sNaN) = canonical NaN
3844 * frsqrt7(-inf) = canonical NaN
3845 * frsqrt7(-normal) = canonical NaN
3846 * frsqrt7(-subnormal) = canonical NaN
3847 */
3848 if (float32_is_signaling_nan(f, s) ||
3849 (float32_is_infinity(f) && sign) ||
3850 (float32_is_normal(f) && sign) ||
3851 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3852 s->float_exception_flags |= float_flag_invalid;
3853 return float32_default_nan(s);
3854 }
3855
3856 /* frsqrt7(qNaN) = canonical NaN */
3857 if (float32_is_quiet_nan(f, s)) {
3858 return float32_default_nan(s);
3859 }
3860
3861 /* frsqrt7(+-0) = +-inf */
3862 if (float32_is_zero(f)) {
3863 s->float_exception_flags |= float_flag_divbyzero;
3864 return float32_set_sign(float32_infinity, sign);
3865 }
3866
3867 /* frsqrt7(+inf) = +0 */
3868 if (float32_is_infinity(f) && !sign) {
3869 return float32_set_sign(float32_zero, sign);
3870 }
3871
3872 /* +normal, +subnormal */
3873 uint64_t val = frsqrt7(f, exp_size, frac_size);
3874 return make_float32(val);
3875 }
3876
frsqrt7_d(float64 f,float_status * s)3877 static float64 frsqrt7_d(float64 f, float_status *s)
3878 {
3879 int exp_size = 11, frac_size = 52;
3880 bool sign = float64_is_neg(f);
3881
3882 /*
3883 * frsqrt7(sNaN) = canonical NaN
3884 * frsqrt7(-inf) = canonical NaN
3885 * frsqrt7(-normal) = canonical NaN
3886 * frsqrt7(-subnormal) = canonical NaN
3887 */
3888 if (float64_is_signaling_nan(f, s) ||
3889 (float64_is_infinity(f) && sign) ||
3890 (float64_is_normal(f) && sign) ||
3891 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3892 s->float_exception_flags |= float_flag_invalid;
3893 return float64_default_nan(s);
3894 }
3895
3896 /* frsqrt7(qNaN) = canonical NaN */
3897 if (float64_is_quiet_nan(f, s)) {
3898 return float64_default_nan(s);
3899 }
3900
3901 /* frsqrt7(+-0) = +-inf */
3902 if (float64_is_zero(f)) {
3903 s->float_exception_flags |= float_flag_divbyzero;
3904 return float64_set_sign(float64_infinity, sign);
3905 }
3906
3907 /* frsqrt7(+inf) = +0 */
3908 if (float64_is_infinity(f) && !sign) {
3909 return float64_set_sign(float64_zero, sign);
3910 }
3911
3912 /* +normal, +subnormal */
3913 uint64_t val = frsqrt7(f, exp_size, frac_size);
3914 return make_float64(val);
3915 }
3916
RVVCALL(OPFVV1,vfrsqrt7_v_h,OP_UU_H,H2,H2,frsqrt7_h)3917 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3918 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3919 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3920 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3921 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3922 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
3923
3924 /*
3925 * Vector Floating-Point Reciprocal Estimate Instruction
3926 *
3927 * Adapted from riscv-v-spec recip.c:
3928 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3929 */
3930 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3931 float_status *s)
3932 {
3933 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3934 uint64_t exp = extract64(f, frac_size, exp_size);
3935 uint64_t frac = extract64(f, 0, frac_size);
3936
3937 const uint8_t lookup_table[] = {
3938 127, 125, 123, 121, 119, 117, 116, 114,
3939 112, 110, 109, 107, 105, 104, 102, 100,
3940 99, 97, 96, 94, 93, 91, 90, 88,
3941 87, 85, 84, 83, 81, 80, 79, 77,
3942 76, 75, 74, 72, 71, 70, 69, 68,
3943 66, 65, 64, 63, 62, 61, 60, 59,
3944 58, 57, 56, 55, 54, 53, 52, 51,
3945 50, 49, 48, 47, 46, 45, 44, 43,
3946 42, 41, 40, 40, 39, 38, 37, 36,
3947 35, 35, 34, 33, 32, 31, 31, 30,
3948 29, 28, 28, 27, 26, 25, 25, 24,
3949 23, 23, 22, 21, 21, 20, 19, 19,
3950 18, 17, 17, 16, 15, 15, 14, 14,
3951 13, 12, 12, 11, 11, 10, 9, 9,
3952 8, 8, 7, 7, 6, 5, 5, 4,
3953 4, 3, 3, 2, 2, 1, 1, 0
3954 };
3955 const int precision = 7;
3956
3957 if (exp == 0 && frac != 0) { /* subnormal */
3958 /* Normalize the subnormal. */
3959 while (extract64(frac, frac_size - 1, 1) == 0) {
3960 exp--;
3961 frac <<= 1;
3962 }
3963
3964 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3965
3966 if (exp != 0 && exp != UINT64_MAX) {
3967 /*
3968 * Overflow to inf or max value of same sign,
3969 * depending on sign and rounding mode.
3970 */
3971 s->float_exception_flags |= (float_flag_inexact |
3972 float_flag_overflow);
3973
3974 if ((s->float_rounding_mode == float_round_to_zero) ||
3975 ((s->float_rounding_mode == float_round_down) && !sign) ||
3976 ((s->float_rounding_mode == float_round_up) && sign)) {
3977 /* Return greatest/negative finite value. */
3978 return (sign << (exp_size + frac_size)) |
3979 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3980 } else {
3981 /* Return +-inf. */
3982 return (sign << (exp_size + frac_size)) |
3983 MAKE_64BIT_MASK(frac_size, exp_size);
3984 }
3985 }
3986 }
3987
3988 int idx = frac >> (frac_size - precision);
3989 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3990 (frac_size - precision);
3991 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3992
3993 if (out_exp == 0 || out_exp == UINT64_MAX) {
3994 /*
3995 * The result is subnormal, but don't raise the underflow exception,
3996 * because there's no additional loss of precision.
3997 */
3998 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3999 if (out_exp == UINT64_MAX) {
4000 out_frac >>= 1;
4001 out_exp = 0;
4002 }
4003 }
4004
4005 uint64_t val = 0;
4006 val = deposit64(val, 0, frac_size, out_frac);
4007 val = deposit64(val, frac_size, exp_size, out_exp);
4008 val = deposit64(val, frac_size + exp_size, 1, sign);
4009 return val;
4010 }
4011
frec7_h(float16 f,float_status * s)4012 static float16 frec7_h(float16 f, float_status *s)
4013 {
4014 int exp_size = 5, frac_size = 10;
4015 bool sign = float16_is_neg(f);
4016
4017 /* frec7(+-inf) = +-0 */
4018 if (float16_is_infinity(f)) {
4019 return float16_set_sign(float16_zero, sign);
4020 }
4021
4022 /* frec7(+-0) = +-inf */
4023 if (float16_is_zero(f)) {
4024 s->float_exception_flags |= float_flag_divbyzero;
4025 return float16_set_sign(float16_infinity, sign);
4026 }
4027
4028 /* frec7(sNaN) = canonical NaN */
4029 if (float16_is_signaling_nan(f, s)) {
4030 s->float_exception_flags |= float_flag_invalid;
4031 return float16_default_nan(s);
4032 }
4033
4034 /* frec7(qNaN) = canonical NaN */
4035 if (float16_is_quiet_nan(f, s)) {
4036 return float16_default_nan(s);
4037 }
4038
4039 /* +-normal, +-subnormal */
4040 uint64_t val = frec7(f, exp_size, frac_size, s);
4041 return make_float16(val);
4042 }
4043
frec7_s(float32 f,float_status * s)4044 static float32 frec7_s(float32 f, float_status *s)
4045 {
4046 int exp_size = 8, frac_size = 23;
4047 bool sign = float32_is_neg(f);
4048
4049 /* frec7(+-inf) = +-0 */
4050 if (float32_is_infinity(f)) {
4051 return float32_set_sign(float32_zero, sign);
4052 }
4053
4054 /* frec7(+-0) = +-inf */
4055 if (float32_is_zero(f)) {
4056 s->float_exception_flags |= float_flag_divbyzero;
4057 return float32_set_sign(float32_infinity, sign);
4058 }
4059
4060 /* frec7(sNaN) = canonical NaN */
4061 if (float32_is_signaling_nan(f, s)) {
4062 s->float_exception_flags |= float_flag_invalid;
4063 return float32_default_nan(s);
4064 }
4065
4066 /* frec7(qNaN) = canonical NaN */
4067 if (float32_is_quiet_nan(f, s)) {
4068 return float32_default_nan(s);
4069 }
4070
4071 /* +-normal, +-subnormal */
4072 uint64_t val = frec7(f, exp_size, frac_size, s);
4073 return make_float32(val);
4074 }
4075
frec7_d(float64 f,float_status * s)4076 static float64 frec7_d(float64 f, float_status *s)
4077 {
4078 int exp_size = 11, frac_size = 52;
4079 bool sign = float64_is_neg(f);
4080
4081 /* frec7(+-inf) = +-0 */
4082 if (float64_is_infinity(f)) {
4083 return float64_set_sign(float64_zero, sign);
4084 }
4085
4086 /* frec7(+-0) = +-inf */
4087 if (float64_is_zero(f)) {
4088 s->float_exception_flags |= float_flag_divbyzero;
4089 return float64_set_sign(float64_infinity, sign);
4090 }
4091
4092 /* frec7(sNaN) = canonical NaN */
4093 if (float64_is_signaling_nan(f, s)) {
4094 s->float_exception_flags |= float_flag_invalid;
4095 return float64_default_nan(s);
4096 }
4097
4098 /* frec7(qNaN) = canonical NaN */
4099 if (float64_is_quiet_nan(f, s)) {
4100 return float64_default_nan(s);
4101 }
4102
4103 /* +-normal, +-subnormal */
4104 uint64_t val = frec7(f, exp_size, frac_size, s);
4105 return make_float64(val);
4106 }
4107
RVVCALL(OPFVV1,vfrec7_v_h,OP_UU_H,H2,H2,frec7_h)4108 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4109 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4110 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
4111 GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4112 GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4113 GEN_VEXT_V_ENV(vfrec7_v_d, 8)
4114
4115 /* Vector Floating-Point MIN/MAX Instructions */
4116 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4117 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4118 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
4119 GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4120 GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4121 GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
4122 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4123 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4124 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
4125 GEN_VEXT_VF(vfmin_vf_h, 2)
4126 GEN_VEXT_VF(vfmin_vf_w, 4)
4127 GEN_VEXT_VF(vfmin_vf_d, 8)
4128
4129 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4130 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4131 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
4132 GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4133 GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4134 GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
4135 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4136 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4137 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
4138 GEN_VEXT_VF(vfmax_vf_h, 2)
4139 GEN_VEXT_VF(vfmax_vf_w, 4)
4140 GEN_VEXT_VF(vfmax_vf_d, 8)
4141
4142 /* Vector Floating-Point Sign-Injection Instructions */
4143 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4144 {
4145 return deposit64(b, 0, 15, a);
4146 }
4147
fsgnj32(uint32_t a,uint32_t b,float_status * s)4148 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4149 {
4150 return deposit64(b, 0, 31, a);
4151 }
4152
fsgnj64(uint64_t a,uint64_t b,float_status * s)4153 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4154 {
4155 return deposit64(b, 0, 63, a);
4156 }
4157
RVVCALL(OPFVV2,vfsgnj_vv_h,OP_UUU_H,H2,H2,H2,fsgnj16)4158 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4159 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4160 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
4161 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4162 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4163 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
4164 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4165 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4166 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
4167 GEN_VEXT_VF(vfsgnj_vf_h, 2)
4168 GEN_VEXT_VF(vfsgnj_vf_w, 4)
4169 GEN_VEXT_VF(vfsgnj_vf_d, 8)
4170
4171 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4172 {
4173 return deposit64(~b, 0, 15, a);
4174 }
4175
fsgnjn32(uint32_t a,uint32_t b,float_status * s)4176 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4177 {
4178 return deposit64(~b, 0, 31, a);
4179 }
4180
fsgnjn64(uint64_t a,uint64_t b,float_status * s)4181 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4182 {
4183 return deposit64(~b, 0, 63, a);
4184 }
4185
RVVCALL(OPFVV2,vfsgnjn_vv_h,OP_UUU_H,H2,H2,H2,fsgnjn16)4186 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4187 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4188 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
4189 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4190 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4191 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
4192 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4193 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4194 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
4195 GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4196 GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4197 GEN_VEXT_VF(vfsgnjn_vf_d, 8)
4198
4199 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4200 {
4201 return deposit64(b ^ a, 0, 15, a);
4202 }
4203
fsgnjx32(uint32_t a,uint32_t b,float_status * s)4204 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4205 {
4206 return deposit64(b ^ a, 0, 31, a);
4207 }
4208
fsgnjx64(uint64_t a,uint64_t b,float_status * s)4209 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4210 {
4211 return deposit64(b ^ a, 0, 63, a);
4212 }
4213
RVVCALL(OPFVV2,vfsgnjx_vv_h,OP_UUU_H,H2,H2,H2,fsgnjx16)4214 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4215 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4216 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4217 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4218 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4219 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
4220 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4221 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4222 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4223 GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4224 GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4225 GEN_VEXT_VF(vfsgnjx_vf_d, 8)
4226
4227 /* Vector Floating-Point Compare Instructions */
4228 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4229 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4230 CPURISCVState *env, uint32_t desc) \
4231 { \
4232 uint32_t vm = vext_vm(desc); \
4233 uint32_t vl = env->vl; \
4234 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4235 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4236 uint32_t vma = vext_vma(desc); \
4237 uint32_t i; \
4238 \
4239 VSTART_CHECK_EARLY_EXIT(env); \
4240 \
4241 for (i = env->vstart; i < vl; i++) { \
4242 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4243 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4244 if (!vm && !vext_elem_mask(v0, i)) { \
4245 /* set masked-off elements to 1s */ \
4246 if (vma) { \
4247 vext_set_elem_mask(vd, i, 1); \
4248 } \
4249 continue; \
4250 } \
4251 vext_set_elem_mask(vd, i, \
4252 DO_OP(s2, s1, &env->fp_status)); \
4253 } \
4254 env->vstart = 0; \
4255 /*
4256 * mask destination register are always tail-agnostic
4257 * set tail elements to 1s
4258 */ \
4259 if (vta_all_1s) { \
4260 for (; i < total_elems; i++) { \
4261 vext_set_elem_mask(vd, i, 1); \
4262 } \
4263 } \
4264 }
4265
4266 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4267 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4268 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4269
4270 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4271 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4272 CPURISCVState *env, uint32_t desc) \
4273 { \
4274 uint32_t vm = vext_vm(desc); \
4275 uint32_t vl = env->vl; \
4276 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4277 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4278 uint32_t vma = vext_vma(desc); \
4279 uint32_t i; \
4280 \
4281 VSTART_CHECK_EARLY_EXIT(env); \
4282 \
4283 for (i = env->vstart; i < vl; i++) { \
4284 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4285 if (!vm && !vext_elem_mask(v0, i)) { \
4286 /* set masked-off elements to 1s */ \
4287 if (vma) { \
4288 vext_set_elem_mask(vd, i, 1); \
4289 } \
4290 continue; \
4291 } \
4292 vext_set_elem_mask(vd, i, \
4293 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4294 } \
4295 env->vstart = 0; \
4296 /*
4297 * mask destination register are always tail-agnostic
4298 * set tail elements to 1s
4299 */ \
4300 if (vta_all_1s) { \
4301 for (; i < total_elems; i++) { \
4302 vext_set_elem_mask(vd, i, 1); \
4303 } \
4304 } \
4305 }
4306
4307 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4308 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4309 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4310
4311 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4312 {
4313 FloatRelation compare = float16_compare_quiet(a, b, s);
4314 return compare != float_relation_equal;
4315 }
4316
vmfne32(uint32_t a,uint32_t b,float_status * s)4317 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4318 {
4319 FloatRelation compare = float32_compare_quiet(a, b, s);
4320 return compare != float_relation_equal;
4321 }
4322
vmfne64(uint64_t a,uint64_t b,float_status * s)4323 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4324 {
4325 FloatRelation compare = float64_compare_quiet(a, b, s);
4326 return compare != float_relation_equal;
4327 }
4328
GEN_VEXT_CMP_VV_ENV(vmfne_vv_h,uint16_t,H2,vmfne16)4329 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4330 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4331 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4332 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4333 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4334 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4335
4336 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4337 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4338 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4339 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4340 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4341 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4342
4343 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4344 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4345 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4346 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4347 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4348 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4349
4350 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4351 {
4352 FloatRelation compare = float16_compare(a, b, s);
4353 return compare == float_relation_greater;
4354 }
4355
vmfgt32(uint32_t a,uint32_t b,float_status * s)4356 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4357 {
4358 FloatRelation compare = float32_compare(a, b, s);
4359 return compare == float_relation_greater;
4360 }
4361
vmfgt64(uint64_t a,uint64_t b,float_status * s)4362 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4363 {
4364 FloatRelation compare = float64_compare(a, b, s);
4365 return compare == float_relation_greater;
4366 }
4367
GEN_VEXT_CMP_VF(vmfgt_vf_h,uint16_t,H2,vmfgt16)4368 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4369 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4370 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4371
4372 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4373 {
4374 FloatRelation compare = float16_compare(a, b, s);
4375 return compare == float_relation_greater ||
4376 compare == float_relation_equal;
4377 }
4378
vmfge32(uint32_t a,uint32_t b,float_status * s)4379 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4380 {
4381 FloatRelation compare = float32_compare(a, b, s);
4382 return compare == float_relation_greater ||
4383 compare == float_relation_equal;
4384 }
4385
vmfge64(uint64_t a,uint64_t b,float_status * s)4386 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4387 {
4388 FloatRelation compare = float64_compare(a, b, s);
4389 return compare == float_relation_greater ||
4390 compare == float_relation_equal;
4391 }
4392
GEN_VEXT_CMP_VF(vmfge_vf_h,uint16_t,H2,vmfge16)4393 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4394 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4395 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4396
4397 /* Vector Floating-Point Classify Instruction */
4398 target_ulong fclass_h(uint64_t frs1)
4399 {
4400 float16 f = frs1;
4401 bool sign = float16_is_neg(f);
4402
4403 if (float16_is_infinity(f)) {
4404 return sign ? 1 << 0 : 1 << 7;
4405 } else if (float16_is_zero(f)) {
4406 return sign ? 1 << 3 : 1 << 4;
4407 } else if (float16_is_zero_or_denormal(f)) {
4408 return sign ? 1 << 2 : 1 << 5;
4409 } else if (float16_is_any_nan(f)) {
4410 float_status s = { }; /* for snan_bit_is_one */
4411 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4412 } else {
4413 return sign ? 1 << 1 : 1 << 6;
4414 }
4415 }
4416
fclass_s(uint64_t frs1)4417 target_ulong fclass_s(uint64_t frs1)
4418 {
4419 float32 f = frs1;
4420 bool sign = float32_is_neg(f);
4421
4422 if (float32_is_infinity(f)) {
4423 return sign ? 1 << 0 : 1 << 7;
4424 } else if (float32_is_zero(f)) {
4425 return sign ? 1 << 3 : 1 << 4;
4426 } else if (float32_is_zero_or_denormal(f)) {
4427 return sign ? 1 << 2 : 1 << 5;
4428 } else if (float32_is_any_nan(f)) {
4429 float_status s = { }; /* for snan_bit_is_one */
4430 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4431 } else {
4432 return sign ? 1 << 1 : 1 << 6;
4433 }
4434 }
4435
fclass_d(uint64_t frs1)4436 target_ulong fclass_d(uint64_t frs1)
4437 {
4438 float64 f = frs1;
4439 bool sign = float64_is_neg(f);
4440
4441 if (float64_is_infinity(f)) {
4442 return sign ? 1 << 0 : 1 << 7;
4443 } else if (float64_is_zero(f)) {
4444 return sign ? 1 << 3 : 1 << 4;
4445 } else if (float64_is_zero_or_denormal(f)) {
4446 return sign ? 1 << 2 : 1 << 5;
4447 } else if (float64_is_any_nan(f)) {
4448 float_status s = { }; /* for snan_bit_is_one */
4449 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4450 } else {
4451 return sign ? 1 << 1 : 1 << 6;
4452 }
4453 }
4454
RVVCALL(OPIVV1,vfclass_v_h,OP_UU_H,H2,H2,fclass_h)4455 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4456 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4457 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4458 GEN_VEXT_V(vfclass_v_h, 2)
4459 GEN_VEXT_V(vfclass_v_w, 4)
4460 GEN_VEXT_V(vfclass_v_d, 8)
4461
4462 /* Vector Floating-Point Merge Instruction */
4463
4464 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4465 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4466 CPURISCVState *env, uint32_t desc) \
4467 { \
4468 uint32_t vm = vext_vm(desc); \
4469 uint32_t vl = env->vl; \
4470 uint32_t esz = sizeof(ETYPE); \
4471 uint32_t total_elems = \
4472 vext_get_total_elems(env, desc, esz); \
4473 uint32_t vta = vext_vta(desc); \
4474 uint32_t i; \
4475 \
4476 VSTART_CHECK_EARLY_EXIT(env); \
4477 \
4478 for (i = env->vstart; i < vl; i++) { \
4479 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4480 *((ETYPE *)vd + H(i)) = \
4481 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4482 } \
4483 env->vstart = 0; \
4484 /* set tail elements to 1s */ \
4485 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4486 }
4487
4488 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4489 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4490 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4491
4492 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4493 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4494 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4495 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4496 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4497 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4498 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4499 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
4500
4501 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4502 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4503 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4504 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4505 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4506 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4507 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
4508
4509 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4510 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4511 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4512 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4513 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4514 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4515 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
4516
4517 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4518 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4519 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4520 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4521 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4522 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4523 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4524
4525 /* Widening Floating-Point/Integer Type-Convert Instructions */
4526 /* (TD, T2, TX2) */
4527 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4528 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4529 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4530 /*
4531 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4532 */
4533 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4534 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4535 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4536 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4537
4538 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4539 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4540 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4541 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4542 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4543
4544 /*
4545 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4546 */
4547 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4548 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4549 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4550 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4551 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4552 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4553
4554 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4555 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4556 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4557 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4558 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4559 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4560 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4561
4562 /*
4563 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4564 */
4565 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4566 {
4567 return float16_to_float32(a, true, s);
4568 }
4569
RVVCALL(OPFVV1,vfwcvt_f_f_v_h,WOP_UU_H,H4,H2,vfwcvtffv16)4570 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4571 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4572 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4573 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
4574
4575 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4576 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4577
4578 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4579 /* (TD, T2, TX2) */
4580 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4581 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4582 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4583 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4584 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4585 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4586 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4587 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4588 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4589 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
4590
4591 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4592 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4593 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4594 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4595 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4596 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4597 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
4598
4599 /*
4600 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4601 */
4602 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4603 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4604 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4605 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
4606
4607 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4608 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4609 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4610 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4611 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
4612
4613 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4614 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4615 {
4616 return float32_to_float16(a, true, s);
4617 }
4618
RVVCALL(OPFVV1,vfncvt_f_f_w_h,NOP_UU_H,H2,H4,vfncvtffv16)4619 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4620 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4621 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4622 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
4623
4624 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4625 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4626
4627 /*
4628 * Vector Reduction Operations
4629 */
4630 /* Vector Single-Width Integer Reduction Instructions */
4631 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4632 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4633 void *vs2, CPURISCVState *env, \
4634 uint32_t desc) \
4635 { \
4636 uint32_t vm = vext_vm(desc); \
4637 uint32_t vl = env->vl; \
4638 uint32_t esz = sizeof(TD); \
4639 uint32_t vlenb = simd_maxsz(desc); \
4640 uint32_t vta = vext_vta(desc); \
4641 uint32_t i; \
4642 TD s1 = *((TD *)vs1 + HD(0)); \
4643 \
4644 for (i = env->vstart; i < vl; i++) { \
4645 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4646 if (!vm && !vext_elem_mask(v0, i)) { \
4647 continue; \
4648 } \
4649 s1 = OP(s1, (TD)s2); \
4650 } \
4651 *((TD *)vd + HD(0)) = s1; \
4652 env->vstart = 0; \
4653 /* set tail elements to 1s */ \
4654 vext_set_elems_1s(vd, vta, esz, vlenb); \
4655 }
4656
4657 /* vd[0] = sum(vs1[0], vs2[*]) */
4658 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4659 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4660 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4661 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4662
4663 /* vd[0] = maxu(vs1[0], vs2[*]) */
4664 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4665 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4666 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4667 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4668
4669 /* vd[0] = max(vs1[0], vs2[*]) */
4670 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4671 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4672 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4673 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4674
4675 /* vd[0] = minu(vs1[0], vs2[*]) */
4676 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4677 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4678 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4679 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4680
4681 /* vd[0] = min(vs1[0], vs2[*]) */
4682 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4683 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4684 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4685 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4686
4687 /* vd[0] = and(vs1[0], vs2[*]) */
4688 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4689 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4690 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4691 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4692
4693 /* vd[0] = or(vs1[0], vs2[*]) */
4694 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4695 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4696 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4697 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4698
4699 /* vd[0] = xor(vs1[0], vs2[*]) */
4700 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4701 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4702 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4703 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4704
4705 /* Vector Widening Integer Reduction Instructions */
4706 /* signed sum reduction into double-width accumulator */
4707 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4708 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4709 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4710
4711 /* Unsigned sum reduction into double-width accumulator */
4712 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4713 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4714 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4715
4716 /* Vector Single-Width Floating-Point Reduction Instructions */
4717 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4718 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4719 void *vs2, CPURISCVState *env, \
4720 uint32_t desc) \
4721 { \
4722 uint32_t vm = vext_vm(desc); \
4723 uint32_t vl = env->vl; \
4724 uint32_t esz = sizeof(TD); \
4725 uint32_t vlenb = simd_maxsz(desc); \
4726 uint32_t vta = vext_vta(desc); \
4727 uint32_t i; \
4728 TD s1 = *((TD *)vs1 + HD(0)); \
4729 \
4730 for (i = env->vstart; i < vl; i++) { \
4731 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4732 if (!vm && !vext_elem_mask(v0, i)) { \
4733 continue; \
4734 } \
4735 s1 = OP(s1, (TD)s2, &env->fp_status); \
4736 } \
4737 *((TD *)vd + HD(0)) = s1; \
4738 env->vstart = 0; \
4739 /* set tail elements to 1s */ \
4740 vext_set_elems_1s(vd, vta, esz, vlenb); \
4741 }
4742
4743 /* Unordered sum */
4744 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4745 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4746 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4747
4748 /* Ordered sum */
4749 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4750 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4751 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4752
4753 /* Maximum value */
4754 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4755 float16_maximum_number)
4756 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4757 float32_maximum_number)
4758 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4759 float64_maximum_number)
4760
4761 /* Minimum value */
4762 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4763 float16_minimum_number)
4764 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4765 float32_minimum_number)
4766 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4767 float64_minimum_number)
4768
4769 /* Vector Widening Floating-Point Add Instructions */
4770 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
4771 {
4772 return float32_add(a, float16_to_float32(b, true, s), s);
4773 }
4774
fwadd32(uint64_t a,uint32_t b,float_status * s)4775 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
4776 {
4777 return float64_add(a, float32_to_float64(b, s), s);
4778 }
4779
4780 /* Vector Widening Floating-Point Reduction Instructions */
4781 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
GEN_VEXT_FRED(vfwredusum_vs_h,uint32_t,uint16_t,H4,H2,fwadd16)4782 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4783 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4784 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4785 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4786
4787 /*
4788 * Vector Mask Operations
4789 */
4790 /* Vector Mask-Register Logical Instructions */
4791 #define GEN_VEXT_MASK_VV(NAME, OP) \
4792 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4793 void *vs2, CPURISCVState *env, \
4794 uint32_t desc) \
4795 { \
4796 uint32_t vl = env->vl; \
4797 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
4798 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4799 uint32_t i; \
4800 int a, b; \
4801 \
4802 VSTART_CHECK_EARLY_EXIT(env); \
4803 \
4804 for (i = env->vstart; i < vl; i++) { \
4805 a = vext_elem_mask(vs1, i); \
4806 b = vext_elem_mask(vs2, i); \
4807 vext_set_elem_mask(vd, i, OP(b, a)); \
4808 } \
4809 env->vstart = 0; \
4810 /*
4811 * mask destination register are always tail-agnostic
4812 * set tail elements to 1s
4813 */ \
4814 if (vta_all_1s) { \
4815 for (; i < total_elems; i++) { \
4816 vext_set_elem_mask(vd, i, 1); \
4817 } \
4818 } \
4819 }
4820
4821 #define DO_NAND(N, M) (!(N & M))
4822 #define DO_ANDNOT(N, M) (N & !M)
4823 #define DO_NOR(N, M) (!(N | M))
4824 #define DO_ORNOT(N, M) (N | !M)
4825 #define DO_XNOR(N, M) (!(N ^ M))
4826
4827 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4828 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4829 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4830 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4831 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4832 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4833 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4834 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4835
4836 /* Vector count population in mask vcpop */
4837 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4838 uint32_t desc)
4839 {
4840 target_ulong cnt = 0;
4841 uint32_t vm = vext_vm(desc);
4842 uint32_t vl = env->vl;
4843 int i;
4844
4845 for (i = env->vstart; i < vl; i++) {
4846 if (vm || vext_elem_mask(v0, i)) {
4847 if (vext_elem_mask(vs2, i)) {
4848 cnt++;
4849 }
4850 }
4851 }
4852 env->vstart = 0;
4853 return cnt;
4854 }
4855
4856 /* vfirst find-first-set mask bit */
HELPER(vfirst_m)4857 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4858 uint32_t desc)
4859 {
4860 uint32_t vm = vext_vm(desc);
4861 uint32_t vl = env->vl;
4862 int i;
4863
4864 for (i = env->vstart; i < vl; i++) {
4865 if (vm || vext_elem_mask(v0, i)) {
4866 if (vext_elem_mask(vs2, i)) {
4867 return i;
4868 }
4869 }
4870 }
4871 env->vstart = 0;
4872 return -1LL;
4873 }
4874
4875 enum set_mask_type {
4876 ONLY_FIRST = 1,
4877 INCLUDE_FIRST,
4878 BEFORE_FIRST,
4879 };
4880
vmsetm(void * vd,void * v0,void * vs2,CPURISCVState * env,uint32_t desc,enum set_mask_type type)4881 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4882 uint32_t desc, enum set_mask_type type)
4883 {
4884 uint32_t vm = vext_vm(desc);
4885 uint32_t vl = env->vl;
4886 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
4887 uint32_t vta_all_1s = vext_vta_all_1s(desc);
4888 uint32_t vma = vext_vma(desc);
4889 int i;
4890 bool first_mask_bit = false;
4891
4892 for (i = env->vstart; i < vl; i++) {
4893 if (!vm && !vext_elem_mask(v0, i)) {
4894 /* set masked-off elements to 1s */
4895 if (vma) {
4896 vext_set_elem_mask(vd, i, 1);
4897 }
4898 continue;
4899 }
4900 /* write a zero to all following active elements */
4901 if (first_mask_bit) {
4902 vext_set_elem_mask(vd, i, 0);
4903 continue;
4904 }
4905 if (vext_elem_mask(vs2, i)) {
4906 first_mask_bit = true;
4907 if (type == BEFORE_FIRST) {
4908 vext_set_elem_mask(vd, i, 0);
4909 } else {
4910 vext_set_elem_mask(vd, i, 1);
4911 }
4912 } else {
4913 if (type == ONLY_FIRST) {
4914 vext_set_elem_mask(vd, i, 0);
4915 } else {
4916 vext_set_elem_mask(vd, i, 1);
4917 }
4918 }
4919 }
4920 env->vstart = 0;
4921 /*
4922 * mask destination register are always tail-agnostic
4923 * set tail elements to 1s
4924 */
4925 if (vta_all_1s) {
4926 for (; i < total_elems; i++) {
4927 vext_set_elem_mask(vd, i, 1);
4928 }
4929 }
4930 }
4931
HELPER(vmsbf_m)4932 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4933 uint32_t desc)
4934 {
4935 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4936 }
4937
HELPER(vmsif_m)4938 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4939 uint32_t desc)
4940 {
4941 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4942 }
4943
HELPER(vmsof_m)4944 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4945 uint32_t desc)
4946 {
4947 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4948 }
4949
4950 /* Vector Iota Instruction */
4951 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
4952 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4953 uint32_t desc) \
4954 { \
4955 uint32_t vm = vext_vm(desc); \
4956 uint32_t vl = env->vl; \
4957 uint32_t esz = sizeof(ETYPE); \
4958 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4959 uint32_t vta = vext_vta(desc); \
4960 uint32_t vma = vext_vma(desc); \
4961 uint32_t sum = 0; \
4962 int i; \
4963 \
4964 for (i = env->vstart; i < vl; i++) { \
4965 if (!vm && !vext_elem_mask(v0, i)) { \
4966 /* set masked-off elements to 1s */ \
4967 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4968 continue; \
4969 } \
4970 *((ETYPE *)vd + H(i)) = sum; \
4971 if (vext_elem_mask(vs2, i)) { \
4972 sum++; \
4973 } \
4974 } \
4975 env->vstart = 0; \
4976 /* set tail elements to 1s */ \
4977 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4978 }
4979
GEN_VEXT_VIOTA_M(viota_m_b,uint8_t,H1)4980 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4981 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4982 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4983 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4984
4985 /* Vector Element Index Instruction */
4986 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
4987 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4988 { \
4989 uint32_t vm = vext_vm(desc); \
4990 uint32_t vl = env->vl; \
4991 uint32_t esz = sizeof(ETYPE); \
4992 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4993 uint32_t vta = vext_vta(desc); \
4994 uint32_t vma = vext_vma(desc); \
4995 int i; \
4996 \
4997 VSTART_CHECK_EARLY_EXIT(env); \
4998 \
4999 for (i = env->vstart; i < vl; i++) { \
5000 if (!vm && !vext_elem_mask(v0, i)) { \
5001 /* set masked-off elements to 1s */ \
5002 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5003 continue; \
5004 } \
5005 *((ETYPE *)vd + H(i)) = i; \
5006 } \
5007 env->vstart = 0; \
5008 /* set tail elements to 1s */ \
5009 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5010 }
5011
5012 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
5013 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
5014 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
5015 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
5016
5017 /*
5018 * Vector Permutation Instructions
5019 */
5020
5021 /* Vector Slide Instructions */
5022 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
5023 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5024 CPURISCVState *env, uint32_t desc) \
5025 { \
5026 uint32_t vm = vext_vm(desc); \
5027 uint32_t vl = env->vl; \
5028 uint32_t esz = sizeof(ETYPE); \
5029 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5030 uint32_t vta = vext_vta(desc); \
5031 uint32_t vma = vext_vma(desc); \
5032 target_ulong offset = s1, i_min, i; \
5033 \
5034 VSTART_CHECK_EARLY_EXIT(env); \
5035 \
5036 i_min = MAX(env->vstart, offset); \
5037 for (i = i_min; i < vl; i++) { \
5038 if (!vm && !vext_elem_mask(v0, i)) { \
5039 /* set masked-off elements to 1s */ \
5040 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5041 continue; \
5042 } \
5043 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
5044 } \
5045 env->vstart = 0; \
5046 /* set tail elements to 1s */ \
5047 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5048 }
5049
5050 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
5051 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
5052 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
5053 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
5054 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
5055
5056 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
5057 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5058 CPURISCVState *env, uint32_t desc) \
5059 { \
5060 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5061 uint32_t vm = vext_vm(desc); \
5062 uint32_t vl = env->vl; \
5063 uint32_t esz = sizeof(ETYPE); \
5064 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5065 uint32_t vta = vext_vta(desc); \
5066 uint32_t vma = vext_vma(desc); \
5067 target_ulong i_max, i_min, i; \
5068 \
5069 VSTART_CHECK_EARLY_EXIT(env); \
5070 \
5071 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
5072 i_max = MAX(i_min, env->vstart); \
5073 for (i = env->vstart; i < i_max; ++i) { \
5074 if (!vm && !vext_elem_mask(v0, i)) { \
5075 /* set masked-off elements to 1s */ \
5076 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5077 continue; \
5078 } \
5079 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
5080 } \
5081 \
5082 for (i = i_max; i < vl; ++i) { \
5083 if (vm || vext_elem_mask(v0, i)) { \
5084 *((ETYPE *)vd + H(i)) = 0; \
5085 } \
5086 } \
5087 \
5088 env->vstart = 0; \
5089 /* set tail elements to 1s */ \
5090 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5091 }
5092
5093 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
5094 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5095 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5096 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5097 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
5098
5099 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
5100 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5101 void *vs2, CPURISCVState *env, \
5102 uint32_t desc) \
5103 { \
5104 typedef uint##BITWIDTH##_t ETYPE; \
5105 uint32_t vm = vext_vm(desc); \
5106 uint32_t vl = env->vl; \
5107 uint32_t esz = sizeof(ETYPE); \
5108 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5109 uint32_t vta = vext_vta(desc); \
5110 uint32_t vma = vext_vma(desc); \
5111 uint32_t i; \
5112 \
5113 VSTART_CHECK_EARLY_EXIT(env); \
5114 \
5115 for (i = env->vstart; i < vl; i++) { \
5116 if (!vm && !vext_elem_mask(v0, i)) { \
5117 /* set masked-off elements to 1s */ \
5118 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5119 continue; \
5120 } \
5121 if (i == 0) { \
5122 *((ETYPE *)vd + H(i)) = s1; \
5123 } else { \
5124 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5125 } \
5126 } \
5127 env->vstart = 0; \
5128 /* set tail elements to 1s */ \
5129 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5130 }
5131
5132 GEN_VEXT_VSLIE1UP(8, H1)
5133 GEN_VEXT_VSLIE1UP(16, H2)
5134 GEN_VEXT_VSLIE1UP(32, H4)
5135 GEN_VEXT_VSLIE1UP(64, H8)
5136
5137 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
5138 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5139 CPURISCVState *env, uint32_t desc) \
5140 { \
5141 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5142 }
5143
5144 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
5145 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5146 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5147 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5148 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5149
5150 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
5151 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5152 void *vs2, CPURISCVState *env, \
5153 uint32_t desc) \
5154 { \
5155 typedef uint##BITWIDTH##_t ETYPE; \
5156 uint32_t vm = vext_vm(desc); \
5157 uint32_t vl = env->vl; \
5158 uint32_t esz = sizeof(ETYPE); \
5159 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5160 uint32_t vta = vext_vta(desc); \
5161 uint32_t vma = vext_vma(desc); \
5162 uint32_t i; \
5163 \
5164 VSTART_CHECK_EARLY_EXIT(env); \
5165 \
5166 for (i = env->vstart; i < vl; i++) { \
5167 if (!vm && !vext_elem_mask(v0, i)) { \
5168 /* set masked-off elements to 1s */ \
5169 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5170 continue; \
5171 } \
5172 if (i == vl - 1) { \
5173 *((ETYPE *)vd + H(i)) = s1; \
5174 } else { \
5175 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5176 } \
5177 } \
5178 env->vstart = 0; \
5179 /* set tail elements to 1s */ \
5180 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5181 }
5182
5183 GEN_VEXT_VSLIDE1DOWN(8, H1)
5184 GEN_VEXT_VSLIDE1DOWN(16, H2)
5185 GEN_VEXT_VSLIDE1DOWN(32, H4)
5186 GEN_VEXT_VSLIDE1DOWN(64, H8)
5187
5188 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
5189 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5190 CPURISCVState *env, uint32_t desc) \
5191 { \
5192 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5193 }
5194
5195 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
5196 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5197 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5198 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5199 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5200
5201 /* Vector Floating-Point Slide Instructions */
5202 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
5203 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5204 CPURISCVState *env, uint32_t desc) \
5205 { \
5206 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5207 }
5208
5209 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5210 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5211 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5212 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5213
5214 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
5215 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5216 CPURISCVState *env, uint32_t desc) \
5217 { \
5218 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5219 }
5220
5221 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5222 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5223 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5224 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
5225
5226 /* Vector Register Gather Instruction */
5227 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
5228 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5229 CPURISCVState *env, uint32_t desc) \
5230 { \
5231 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
5232 uint32_t vm = vext_vm(desc); \
5233 uint32_t vl = env->vl; \
5234 uint32_t esz = sizeof(TS2); \
5235 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5236 uint32_t vta = vext_vta(desc); \
5237 uint32_t vma = vext_vma(desc); \
5238 uint64_t index; \
5239 uint32_t i; \
5240 \
5241 VSTART_CHECK_EARLY_EXIT(env); \
5242 \
5243 for (i = env->vstart; i < vl; i++) { \
5244 if (!vm && !vext_elem_mask(v0, i)) { \
5245 /* set masked-off elements to 1s */ \
5246 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5247 continue; \
5248 } \
5249 index = *((TS1 *)vs1 + HS1(i)); \
5250 if (index >= vlmax) { \
5251 *((TS2 *)vd + HS2(i)) = 0; \
5252 } else { \
5253 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
5254 } \
5255 } \
5256 env->vstart = 0; \
5257 /* set tail elements to 1s */ \
5258 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5259 }
5260
5261 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
5262 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5263 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5264 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5265 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5266
5267 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5268 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5269 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5270 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5271
5272 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
5273 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5274 CPURISCVState *env, uint32_t desc) \
5275 { \
5276 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5277 uint32_t vm = vext_vm(desc); \
5278 uint32_t vl = env->vl; \
5279 uint32_t esz = sizeof(ETYPE); \
5280 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5281 uint32_t vta = vext_vta(desc); \
5282 uint32_t vma = vext_vma(desc); \
5283 uint64_t index = s1; \
5284 uint32_t i; \
5285 \
5286 VSTART_CHECK_EARLY_EXIT(env); \
5287 \
5288 for (i = env->vstart; i < vl; i++) { \
5289 if (!vm && !vext_elem_mask(v0, i)) { \
5290 /* set masked-off elements to 1s */ \
5291 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5292 continue; \
5293 } \
5294 if (index >= vlmax) { \
5295 *((ETYPE *)vd + H(i)) = 0; \
5296 } else { \
5297 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5298 } \
5299 } \
5300 env->vstart = 0; \
5301 /* set tail elements to 1s */ \
5302 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5303 }
5304
5305 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
5306 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5307 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5308 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5309 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
5310
5311 /* Vector Compress Instruction */
5312 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
5313 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5314 CPURISCVState *env, uint32_t desc) \
5315 { \
5316 uint32_t vl = env->vl; \
5317 uint32_t esz = sizeof(ETYPE); \
5318 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5319 uint32_t vta = vext_vta(desc); \
5320 uint32_t num = 0, i; \
5321 \
5322 for (i = env->vstart; i < vl; i++) { \
5323 if (!vext_elem_mask(vs1, i)) { \
5324 continue; \
5325 } \
5326 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5327 num++; \
5328 } \
5329 env->vstart = 0; \
5330 /* set tail elements to 1s */ \
5331 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \
5332 }
5333
5334 /* Compress into vd elements of vs2 where vs1 is enabled */
5335 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5336 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5337 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5338 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5339
5340 /* Vector Whole Register Move */
5341 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5342 {
5343 /* EEW = SEW */
5344 uint32_t maxsz = simd_maxsz(desc);
5345 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5346 uint32_t startb = env->vstart * sewb;
5347 uint32_t i = startb;
5348
5349 if (startb >= maxsz) {
5350 env->vstart = 0;
5351 return;
5352 }
5353
5354 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5355 uint32_t j = ROUND_UP(i, 8);
5356 memcpy((uint8_t *)vd + H1(j - 1),
5357 (uint8_t *)vs2 + H1(j - 1),
5358 j - i);
5359 i = j;
5360 }
5361
5362 memcpy((uint8_t *)vd + H1(i),
5363 (uint8_t *)vs2 + H1(i),
5364 maxsz - i);
5365
5366 env->vstart = 0;
5367 }
5368
5369 /* Vector Integer Extension */
5370 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5371 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5372 CPURISCVState *env, uint32_t desc) \
5373 { \
5374 uint32_t vl = env->vl; \
5375 uint32_t vm = vext_vm(desc); \
5376 uint32_t esz = sizeof(ETYPE); \
5377 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5378 uint32_t vta = vext_vta(desc); \
5379 uint32_t vma = vext_vma(desc); \
5380 uint32_t i; \
5381 \
5382 VSTART_CHECK_EARLY_EXIT(env); \
5383 \
5384 for (i = env->vstart; i < vl; i++) { \
5385 if (!vm && !vext_elem_mask(v0, i)) { \
5386 /* set masked-off elements to 1s */ \
5387 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5388 continue; \
5389 } \
5390 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5391 } \
5392 env->vstart = 0; \
5393 /* set tail elements to 1s */ \
5394 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5395 }
5396
5397 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5398 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5399 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5400 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5401 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5402 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5403
5404 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5405 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5406 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5407 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5408 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5409 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)
5410