1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/cpu_ldst.h"
26 #include "exec/page-protection.h"
27 #include "exec/helper-proto.h"
28 #include "fpu/softfloat.h"
29 #include "tcg/tcg-gvec-desc.h"
30 #include "internals.h"
31 #include "vector_internals.h"
32 #include <math.h>
33
HELPER(vsetvl)34 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
35 target_ulong s2)
36 {
37 int vlmax, vl;
38 RISCVCPU *cpu = env_archcpu(env);
39 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
40 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
41 uint16_t sew = 8 << vsew;
42 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
43 int xlen = riscv_cpu_xlen(env);
44 bool vill = (s2 >> (xlen - 1)) & 0x1;
45 target_ulong reserved = s2 &
46 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
47 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
48 uint16_t vlen = cpu->cfg.vlenb << 3;
49 int8_t lmul;
50
51 if (vlmul & 4) {
52 /*
53 * Fractional LMUL, check:
54 *
55 * VLEN * LMUL >= SEW
56 * VLEN >> (8 - lmul) >= sew
57 * (vlenb << 3) >> (8 - lmul) >= sew
58 */
59 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
60 vill = true;
61 }
62 }
63
64 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
65 /* only set vill bit. */
66 env->vill = 1;
67 env->vtype = 0;
68 env->vl = 0;
69 env->vstart = 0;
70 return 0;
71 }
72
73 /* lmul encoded as in DisasContext::lmul */
74 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
75 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
76 if (s1 <= vlmax) {
77 vl = s1;
78 } else {
79 vl = vlmax;
80 }
81 env->vl = vl;
82 env->vtype = s2;
83 env->vstart = 0;
84 env->vill = 0;
85 return vl;
86 }
87
88 /*
89 * Get the maximum number of elements can be operated.
90 *
91 * log2_esz: log2 of element size in bytes.
92 */
vext_max_elems(uint32_t desc,uint32_t log2_esz)93 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
94 {
95 /*
96 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
97 * so vlen in bytes (vlenb) is encoded as maxsz.
98 */
99 uint32_t vlenb = simd_maxsz(desc);
100
101 /* Return VLMAX */
102 int scale = vext_lmul(desc) - log2_esz;
103 return scale < 0 ? vlenb >> -scale : vlenb << scale;
104 }
105
adjust_addr(CPURISCVState * env,target_ulong addr)106 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
107 {
108 return (addr & ~env->cur_pmmask) | env->cur_pmbase;
109 }
110
111 /*
112 * This function checks watchpoint before real load operation.
113 *
114 * In system mode, the TLB API probe_access is enough for watchpoint check.
115 * In user mode, there is no watchpoint support now.
116 *
117 * It will trigger an exception if there is no mapping in TLB
118 * and page table walk can't fill the TLB entry. Then the guest
119 * software can return here after process the exception or never return.
120 */
probe_pages(CPURISCVState * env,target_ulong addr,target_ulong len,uintptr_t ra,MMUAccessType access_type)121 static void probe_pages(CPURISCVState *env, target_ulong addr,
122 target_ulong len, uintptr_t ra,
123 MMUAccessType access_type)
124 {
125 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
126 target_ulong curlen = MIN(pagelen, len);
127 int mmu_index = riscv_env_mmu_index(env, false);
128
129 probe_access(env, adjust_addr(env, addr), curlen, access_type,
130 mmu_index, ra);
131 if (len > curlen) {
132 addr += curlen;
133 curlen = len - curlen;
134 probe_access(env, adjust_addr(env, addr), curlen, access_type,
135 mmu_index, ra);
136 }
137 }
138
vext_set_elem_mask(void * v0,int index,uint8_t value)139 static inline void vext_set_elem_mask(void *v0, int index,
140 uint8_t value)
141 {
142 int idx = index / 64;
143 int pos = index % 64;
144 uint64_t old = ((uint64_t *)v0)[idx];
145 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
146 }
147
148 /* elements operations for load and store */
149 typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr,
150 uint32_t idx, void *vd, uintptr_t retaddr);
151
152 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
153 static void NAME(CPURISCVState *env, abi_ptr addr, \
154 uint32_t idx, void *vd, uintptr_t retaddr)\
155 { \
156 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
157 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
158 } \
159
GEN_VEXT_LD_ELEM(lde_b,int8_t,H1,ldsb)160 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
161 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
162 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
163 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
164
165 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
166 static void NAME(CPURISCVState *env, abi_ptr addr, \
167 uint32_t idx, void *vd, uintptr_t retaddr)\
168 { \
169 ETYPE data = *((ETYPE *)vd + H(idx)); \
170 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
171 }
172
173 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
174 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
175 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
176 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
177
178 static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
179 uint32_t desc, uint32_t nf,
180 uint32_t esz, uint32_t max_elems)
181 {
182 uint32_t vta = vext_vta(desc);
183 int k;
184
185 if (vta == 0) {
186 return;
187 }
188
189 for (k = 0; k < nf; ++k) {
190 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
191 (k * max_elems + max_elems) * esz);
192 }
193 }
194
195 /*
196 * stride: access vector element from strided memory
197 */
198 static void
vext_ldst_stride(void * vd,void * v0,target_ulong base,target_ulong stride,CPURISCVState * env,uint32_t desc,uint32_t vm,vext_ldst_elem_fn * ldst_elem,uint32_t log2_esz,uintptr_t ra)199 vext_ldst_stride(void *vd, void *v0, target_ulong base,
200 target_ulong stride, CPURISCVState *env,
201 uint32_t desc, uint32_t vm,
202 vext_ldst_elem_fn *ldst_elem,
203 uint32_t log2_esz, uintptr_t ra)
204 {
205 uint32_t i, k;
206 uint32_t nf = vext_nf(desc);
207 uint32_t max_elems = vext_max_elems(desc, log2_esz);
208 uint32_t esz = 1 << log2_esz;
209 uint32_t vma = vext_vma(desc);
210
211 VSTART_CHECK_EARLY_EXIT(env);
212
213 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
214 k = 0;
215 while (k < nf) {
216 if (!vm && !vext_elem_mask(v0, i)) {
217 /* set masked-off elements to 1s */
218 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
219 (i + k * max_elems + 1) * esz);
220 k++;
221 continue;
222 }
223 target_ulong addr = base + stride * i + (k << log2_esz);
224 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
225 k++;
226 }
227 }
228 env->vstart = 0;
229
230 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
231 }
232
233 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
234 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
235 target_ulong stride, CPURISCVState *env, \
236 uint32_t desc) \
237 { \
238 uint32_t vm = vext_vm(desc); \
239 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
240 ctzl(sizeof(ETYPE)), GETPC()); \
241 }
242
GEN_VEXT_LD_STRIDE(vlse8_v,int8_t,lde_b)243 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
244 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
245 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
246 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
247
248 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
249 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
250 target_ulong stride, CPURISCVState *env, \
251 uint32_t desc) \
252 { \
253 uint32_t vm = vext_vm(desc); \
254 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
255 ctzl(sizeof(ETYPE)), GETPC()); \
256 }
257
258 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
259 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
260 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
261 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
262
263 /*
264 * unit-stride: access elements stored contiguously in memory
265 */
266
267 /* unmasked unit-stride load and store operation */
268 static void
269 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
270 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
271 uintptr_t ra)
272 {
273 uint32_t i, k;
274 uint32_t nf = vext_nf(desc);
275 uint32_t max_elems = vext_max_elems(desc, log2_esz);
276 uint32_t esz = 1 << log2_esz;
277
278 VSTART_CHECK_EARLY_EXIT(env);
279
280 /* load bytes from guest memory */
281 for (i = env->vstart; i < evl; env->vstart = ++i) {
282 k = 0;
283 while (k < nf) {
284 target_ulong addr = base + ((i * nf + k) << log2_esz);
285 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
286 k++;
287 }
288 }
289 env->vstart = 0;
290
291 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
292 }
293
294 /*
295 * masked unit-stride load and store operation will be a special case of
296 * stride, stride = NF * sizeof (ETYPE)
297 */
298
299 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
300 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
301 CPURISCVState *env, uint32_t desc) \
302 { \
303 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
304 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
305 ctzl(sizeof(ETYPE)), GETPC()); \
306 } \
307 \
308 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
309 CPURISCVState *env, uint32_t desc) \
310 { \
311 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
312 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
313 }
314
GEN_VEXT_LD_US(vle8_v,int8_t,lde_b)315 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
316 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
317 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
318 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
319
320 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
321 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
322 CPURISCVState *env, uint32_t desc) \
323 { \
324 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
325 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
326 ctzl(sizeof(ETYPE)), GETPC()); \
327 } \
328 \
329 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
330 CPURISCVState *env, uint32_t desc) \
331 { \
332 vext_ldst_us(vd, base, env, desc, STORE_FN, \
333 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
334 }
335
336 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
337 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
338 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
339 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
340
341 /*
342 * unit stride mask load and store, EEW = 1
343 */
344 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
345 CPURISCVState *env, uint32_t desc)
346 {
347 /* evl = ceil(vl/8) */
348 uint8_t evl = (env->vl + 7) >> 3;
349 vext_ldst_us(vd, base, env, desc, lde_b,
350 0, evl, GETPC());
351 }
352
HELPER(vsm_v)353 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
354 CPURISCVState *env, uint32_t desc)
355 {
356 /* evl = ceil(vl/8) */
357 uint8_t evl = (env->vl + 7) >> 3;
358 vext_ldst_us(vd, base, env, desc, ste_b,
359 0, evl, GETPC());
360 }
361
362 /*
363 * index: access vector element from indexed memory
364 */
365 typedef target_ulong vext_get_index_addr(target_ulong base,
366 uint32_t idx, void *vs2);
367
368 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
369 static target_ulong NAME(target_ulong base, \
370 uint32_t idx, void *vs2) \
371 { \
372 return (base + *((ETYPE *)vs2 + H(idx))); \
373 }
374
GEN_VEXT_GET_INDEX_ADDR(idx_b,uint8_t,H1)375 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
376 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
377 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
378 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
379
380 static inline void
381 vext_ldst_index(void *vd, void *v0, target_ulong base,
382 void *vs2, CPURISCVState *env, uint32_t desc,
383 vext_get_index_addr get_index_addr,
384 vext_ldst_elem_fn *ldst_elem,
385 uint32_t log2_esz, uintptr_t ra)
386 {
387 uint32_t i, k;
388 uint32_t nf = vext_nf(desc);
389 uint32_t vm = vext_vm(desc);
390 uint32_t max_elems = vext_max_elems(desc, log2_esz);
391 uint32_t esz = 1 << log2_esz;
392 uint32_t vma = vext_vma(desc);
393
394 VSTART_CHECK_EARLY_EXIT(env);
395
396 /* load bytes from guest memory */
397 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
398 k = 0;
399 while (k < nf) {
400 if (!vm && !vext_elem_mask(v0, i)) {
401 /* set masked-off elements to 1s */
402 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
403 (i + k * max_elems + 1) * esz);
404 k++;
405 continue;
406 }
407 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
408 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
409 k++;
410 }
411 }
412 env->vstart = 0;
413
414 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
415 }
416
417 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
418 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
419 void *vs2, CPURISCVState *env, uint32_t desc) \
420 { \
421 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
422 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
423 }
424
GEN_VEXT_LD_INDEX(vlxei8_8_v,int8_t,idx_b,lde_b)425 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
426 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
427 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
428 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
429 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
430 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
431 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
432 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
433 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
434 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
435 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
436 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
437 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
438 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
439 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
440 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
441
442 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
443 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
444 void *vs2, CPURISCVState *env, uint32_t desc) \
445 { \
446 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
447 STORE_FN, ctzl(sizeof(ETYPE)), \
448 GETPC()); \
449 }
450
451 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
452 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
453 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
454 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
455 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
456 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
457 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
458 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
459 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
460 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
461 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
462 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
463 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
464 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
465 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
466 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
467
468 /*
469 * unit-stride fault-only-fisrt load instructions
470 */
471 static inline void
472 vext_ldff(void *vd, void *v0, target_ulong base,
473 CPURISCVState *env, uint32_t desc,
474 vext_ldst_elem_fn *ldst_elem,
475 uint32_t log2_esz, uintptr_t ra)
476 {
477 uint32_t i, k, vl = 0;
478 uint32_t nf = vext_nf(desc);
479 uint32_t vm = vext_vm(desc);
480 uint32_t max_elems = vext_max_elems(desc, log2_esz);
481 uint32_t esz = 1 << log2_esz;
482 uint32_t vma = vext_vma(desc);
483 target_ulong addr, offset, remain;
484 int mmu_index = riscv_env_mmu_index(env, false);
485
486 VSTART_CHECK_EARLY_EXIT(env);
487
488 /* probe every access */
489 for (i = env->vstart; i < env->vl; i++) {
490 if (!vm && !vext_elem_mask(v0, i)) {
491 continue;
492 }
493 addr = adjust_addr(env, base + i * (nf << log2_esz));
494 if (i == 0) {
495 /* Allow fault on first element. */
496 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
497 } else {
498 remain = nf << log2_esz;
499 while (remain > 0) {
500 void *host;
501 int flags;
502
503 offset = -(addr | TARGET_PAGE_MASK);
504
505 /* Probe nonfault on subsequent elements. */
506 flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
507 mmu_index, true, &host, 0);
508
509 /*
510 * Stop if invalid (unmapped) or mmio (transaction may fail).
511 * Do not stop if watchpoint, as the spec says that
512 * first-fault should continue to access the same
513 * elements regardless of any watchpoint.
514 */
515 if (flags & ~TLB_WATCHPOINT) {
516 vl = i;
517 goto ProbeSuccess;
518 }
519 if (remain <= offset) {
520 break;
521 }
522 remain -= offset;
523 addr = adjust_addr(env, addr + offset);
524 }
525 }
526 }
527 ProbeSuccess:
528 /* load bytes from guest memory */
529 if (vl != 0) {
530 env->vl = vl;
531 }
532 for (i = env->vstart; i < env->vl; i++) {
533 k = 0;
534 while (k < nf) {
535 if (!vm && !vext_elem_mask(v0, i)) {
536 /* set masked-off elements to 1s */
537 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
538 (i + k * max_elems + 1) * esz);
539 k++;
540 continue;
541 }
542 addr = base + ((i * nf + k) << log2_esz);
543 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
544 k++;
545 }
546 }
547 env->vstart = 0;
548
549 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
550 }
551
552 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
553 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
554 CPURISCVState *env, uint32_t desc) \
555 { \
556 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
557 ctzl(sizeof(ETYPE)), GETPC()); \
558 }
559
GEN_VEXT_LDFF(vle8ff_v,int8_t,lde_b)560 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
561 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
562 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
563 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
564
565 #define DO_SWAP(N, M) (M)
566 #define DO_AND(N, M) (N & M)
567 #define DO_XOR(N, M) (N ^ M)
568 #define DO_OR(N, M) (N | M)
569 #define DO_ADD(N, M) (N + M)
570
571 /* Signed min/max */
572 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
573 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
574
575 /*
576 * load and store whole register instructions
577 */
578 static void
579 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
580 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
581 {
582 uint32_t i, k, off, pos;
583 uint32_t nf = vext_nf(desc);
584 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
585 uint32_t max_elems = vlenb >> log2_esz;
586
587 if (env->vstart >= ((vlenb * nf) >> log2_esz)) {
588 env->vstart = 0;
589 return;
590 }
591
592 k = env->vstart / max_elems;
593 off = env->vstart % max_elems;
594
595 if (off) {
596 /* load/store rest of elements of current segment pointed by vstart */
597 for (pos = off; pos < max_elems; pos++, env->vstart++) {
598 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
599 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd,
600 ra);
601 }
602 k++;
603 }
604
605 /* load/store elements for rest of segments */
606 for (; k < nf; k++) {
607 for (i = 0; i < max_elems; i++, env->vstart++) {
608 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
609 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
610 }
611 }
612
613 env->vstart = 0;
614 }
615
616 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
617 void HELPER(NAME)(void *vd, target_ulong base, \
618 CPURISCVState *env, uint32_t desc) \
619 { \
620 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
621 ctzl(sizeof(ETYPE)), GETPC()); \
622 }
623
GEN_VEXT_LD_WHOLE(vl1re8_v,int8_t,lde_b)624 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
625 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
626 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
627 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
628 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
629 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
630 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
631 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
632 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
633 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
634 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
635 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
636 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
637 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
638 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
639 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
640
641 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
642 void HELPER(NAME)(void *vd, target_ulong base, \
643 CPURISCVState *env, uint32_t desc) \
644 { \
645 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
646 ctzl(sizeof(ETYPE)), GETPC()); \
647 }
648
649 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
650 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
651 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
652 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
653
654 /*
655 * Vector Integer Arithmetic Instructions
656 */
657
658 /* (TD, T1, T2, TX1, TX2) */
659 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
660 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
661 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
662 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
663 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
664 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
665 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
666 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
667 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
668 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
669 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
670 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
671 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
672 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
673 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
674 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
675 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
676 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
677 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
678 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
679 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
680 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
681 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
682
683 #define DO_SUB(N, M) (N - M)
684 #define DO_RSUB(N, M) (M - N)
685
686 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
687 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
688 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
689 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
690 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
691 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
692 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
693 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
694
695 GEN_VEXT_VV(vadd_vv_b, 1)
696 GEN_VEXT_VV(vadd_vv_h, 2)
697 GEN_VEXT_VV(vadd_vv_w, 4)
698 GEN_VEXT_VV(vadd_vv_d, 8)
699 GEN_VEXT_VV(vsub_vv_b, 1)
700 GEN_VEXT_VV(vsub_vv_h, 2)
701 GEN_VEXT_VV(vsub_vv_w, 4)
702 GEN_VEXT_VV(vsub_vv_d, 8)
703
704
705 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
706 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
707 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
708 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
709 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
710 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
711 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
712 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
713 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
714 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
715 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
716 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
717
718 GEN_VEXT_VX(vadd_vx_b, 1)
719 GEN_VEXT_VX(vadd_vx_h, 2)
720 GEN_VEXT_VX(vadd_vx_w, 4)
721 GEN_VEXT_VX(vadd_vx_d, 8)
722 GEN_VEXT_VX(vsub_vx_b, 1)
723 GEN_VEXT_VX(vsub_vx_h, 2)
724 GEN_VEXT_VX(vsub_vx_w, 4)
725 GEN_VEXT_VX(vsub_vx_d, 8)
726 GEN_VEXT_VX(vrsub_vx_b, 1)
727 GEN_VEXT_VX(vrsub_vx_h, 2)
728 GEN_VEXT_VX(vrsub_vx_w, 4)
729 GEN_VEXT_VX(vrsub_vx_d, 8)
730
731 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
732 {
733 intptr_t oprsz = simd_oprsz(desc);
734 intptr_t i;
735
736 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
737 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
738 }
739 }
740
HELPER(vec_rsubs16)741 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
742 {
743 intptr_t oprsz = simd_oprsz(desc);
744 intptr_t i;
745
746 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
747 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
748 }
749 }
750
HELPER(vec_rsubs32)751 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
752 {
753 intptr_t oprsz = simd_oprsz(desc);
754 intptr_t i;
755
756 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
757 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
758 }
759 }
760
HELPER(vec_rsubs64)761 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
762 {
763 intptr_t oprsz = simd_oprsz(desc);
764 intptr_t i;
765
766 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
767 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
768 }
769 }
770
771 /* Vector Widening Integer Add/Subtract */
772 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
773 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
774 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
775 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
776 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
777 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
778 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
779 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
780 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
781 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
782 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
783 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
RVVCALL(OPIVV2,vwaddu_vv_b,WOP_UUU_B,H2,H1,H1,DO_ADD)784 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
785 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
786 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
787 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
788 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
789 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
790 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
791 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
792 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
793 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
794 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
795 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
796 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
797 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
798 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
799 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
800 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
801 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
802 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
803 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
804 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
805 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
806 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
807 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
808 GEN_VEXT_VV(vwaddu_vv_b, 2)
809 GEN_VEXT_VV(vwaddu_vv_h, 4)
810 GEN_VEXT_VV(vwaddu_vv_w, 8)
811 GEN_VEXT_VV(vwsubu_vv_b, 2)
812 GEN_VEXT_VV(vwsubu_vv_h, 4)
813 GEN_VEXT_VV(vwsubu_vv_w, 8)
814 GEN_VEXT_VV(vwadd_vv_b, 2)
815 GEN_VEXT_VV(vwadd_vv_h, 4)
816 GEN_VEXT_VV(vwadd_vv_w, 8)
817 GEN_VEXT_VV(vwsub_vv_b, 2)
818 GEN_VEXT_VV(vwsub_vv_h, 4)
819 GEN_VEXT_VV(vwsub_vv_w, 8)
820 GEN_VEXT_VV(vwaddu_wv_b, 2)
821 GEN_VEXT_VV(vwaddu_wv_h, 4)
822 GEN_VEXT_VV(vwaddu_wv_w, 8)
823 GEN_VEXT_VV(vwsubu_wv_b, 2)
824 GEN_VEXT_VV(vwsubu_wv_h, 4)
825 GEN_VEXT_VV(vwsubu_wv_w, 8)
826 GEN_VEXT_VV(vwadd_wv_b, 2)
827 GEN_VEXT_VV(vwadd_wv_h, 4)
828 GEN_VEXT_VV(vwadd_wv_w, 8)
829 GEN_VEXT_VV(vwsub_wv_b, 2)
830 GEN_VEXT_VV(vwsub_wv_h, 4)
831 GEN_VEXT_VV(vwsub_wv_w, 8)
832
833 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
834 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
835 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
836 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
837 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
838 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
839 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
840 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
841 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
842 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
843 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
844 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
845 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
846 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
847 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
848 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
849 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
850 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
851 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
852 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
853 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
854 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
855 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
856 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
857 GEN_VEXT_VX(vwaddu_vx_b, 2)
858 GEN_VEXT_VX(vwaddu_vx_h, 4)
859 GEN_VEXT_VX(vwaddu_vx_w, 8)
860 GEN_VEXT_VX(vwsubu_vx_b, 2)
861 GEN_VEXT_VX(vwsubu_vx_h, 4)
862 GEN_VEXT_VX(vwsubu_vx_w, 8)
863 GEN_VEXT_VX(vwadd_vx_b, 2)
864 GEN_VEXT_VX(vwadd_vx_h, 4)
865 GEN_VEXT_VX(vwadd_vx_w, 8)
866 GEN_VEXT_VX(vwsub_vx_b, 2)
867 GEN_VEXT_VX(vwsub_vx_h, 4)
868 GEN_VEXT_VX(vwsub_vx_w, 8)
869 GEN_VEXT_VX(vwaddu_wx_b, 2)
870 GEN_VEXT_VX(vwaddu_wx_h, 4)
871 GEN_VEXT_VX(vwaddu_wx_w, 8)
872 GEN_VEXT_VX(vwsubu_wx_b, 2)
873 GEN_VEXT_VX(vwsubu_wx_h, 4)
874 GEN_VEXT_VX(vwsubu_wx_w, 8)
875 GEN_VEXT_VX(vwadd_wx_b, 2)
876 GEN_VEXT_VX(vwadd_wx_h, 4)
877 GEN_VEXT_VX(vwadd_wx_w, 8)
878 GEN_VEXT_VX(vwsub_wx_b, 2)
879 GEN_VEXT_VX(vwsub_wx_h, 4)
880 GEN_VEXT_VX(vwsub_wx_w, 8)
881
882 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
883 #define DO_VADC(N, M, C) (N + M + C)
884 #define DO_VSBC(N, M, C) (N - M - C)
885
886 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
887 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
888 CPURISCVState *env, uint32_t desc) \
889 { \
890 uint32_t vl = env->vl; \
891 uint32_t esz = sizeof(ETYPE); \
892 uint32_t total_elems = \
893 vext_get_total_elems(env, desc, esz); \
894 uint32_t vta = vext_vta(desc); \
895 uint32_t i; \
896 \
897 VSTART_CHECK_EARLY_EXIT(env); \
898 \
899 for (i = env->vstart; i < vl; i++) { \
900 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
901 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
902 ETYPE carry = vext_elem_mask(v0, i); \
903 \
904 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
905 } \
906 env->vstart = 0; \
907 /* set tail elements to 1s */ \
908 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
909 }
910
911 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
912 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
913 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
914 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
915
916 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
917 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
918 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
919 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
920
921 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
922 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
923 CPURISCVState *env, uint32_t desc) \
924 { \
925 uint32_t vl = env->vl; \
926 uint32_t esz = sizeof(ETYPE); \
927 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
928 uint32_t vta = vext_vta(desc); \
929 uint32_t i; \
930 \
931 VSTART_CHECK_EARLY_EXIT(env); \
932 \
933 for (i = env->vstart; i < vl; i++) { \
934 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
935 ETYPE carry = vext_elem_mask(v0, i); \
936 \
937 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
938 } \
939 env->vstart = 0; \
940 /* set tail elements to 1s */ \
941 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
942 }
943
944 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
945 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
946 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
947 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
948
949 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
950 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
951 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
952 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
953
954 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
955 (__typeof(N))(N + M) < N)
956 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
957
958 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
959 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
960 CPURISCVState *env, uint32_t desc) \
961 { \
962 uint32_t vl = env->vl; \
963 uint32_t vm = vext_vm(desc); \
964 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
965 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
966 uint32_t i; \
967 \
968 VSTART_CHECK_EARLY_EXIT(env); \
969 \
970 for (i = env->vstart; i < vl; i++) { \
971 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
972 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
973 ETYPE carry = !vm && vext_elem_mask(v0, i); \
974 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
975 } \
976 env->vstart = 0; \
977 /*
978 * mask destination register are always tail-agnostic
979 * set tail elements to 1s
980 */ \
981 if (vta_all_1s) { \
982 for (; i < total_elems; i++) { \
983 vext_set_elem_mask(vd, i, 1); \
984 } \
985 } \
986 }
987
988 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
989 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
990 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
991 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
992
993 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
994 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
995 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
996 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
997
998 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
999 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1000 void *vs2, CPURISCVState *env, uint32_t desc) \
1001 { \
1002 uint32_t vl = env->vl; \
1003 uint32_t vm = vext_vm(desc); \
1004 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1005 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1006 uint32_t i; \
1007 \
1008 VSTART_CHECK_EARLY_EXIT(env); \
1009 \
1010 for (i = env->vstart; i < vl; i++) { \
1011 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1012 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1013 vext_set_elem_mask(vd, i, \
1014 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1015 } \
1016 env->vstart = 0; \
1017 /*
1018 * mask destination register are always tail-agnostic
1019 * set tail elements to 1s
1020 */ \
1021 if (vta_all_1s) { \
1022 for (; i < total_elems; i++) { \
1023 vext_set_elem_mask(vd, i, 1); \
1024 } \
1025 } \
1026 }
1027
1028 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1029 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1030 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1031 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1032
1033 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1034 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1035 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1036 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1037
1038 /* Vector Bitwise Logical Instructions */
1039 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1040 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1041 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1042 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1043 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1044 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1045 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1046 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1047 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1048 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1049 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1050 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1051 GEN_VEXT_VV(vand_vv_b, 1)
1052 GEN_VEXT_VV(vand_vv_h, 2)
1053 GEN_VEXT_VV(vand_vv_w, 4)
1054 GEN_VEXT_VV(vand_vv_d, 8)
1055 GEN_VEXT_VV(vor_vv_b, 1)
1056 GEN_VEXT_VV(vor_vv_h, 2)
1057 GEN_VEXT_VV(vor_vv_w, 4)
1058 GEN_VEXT_VV(vor_vv_d, 8)
1059 GEN_VEXT_VV(vxor_vv_b, 1)
1060 GEN_VEXT_VV(vxor_vv_h, 2)
1061 GEN_VEXT_VV(vxor_vv_w, 4)
1062 GEN_VEXT_VV(vxor_vv_d, 8)
1063
1064 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1065 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1066 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1067 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1068 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1069 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1070 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1071 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1072 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1073 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1074 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1075 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1076 GEN_VEXT_VX(vand_vx_b, 1)
1077 GEN_VEXT_VX(vand_vx_h, 2)
1078 GEN_VEXT_VX(vand_vx_w, 4)
1079 GEN_VEXT_VX(vand_vx_d, 8)
1080 GEN_VEXT_VX(vor_vx_b, 1)
1081 GEN_VEXT_VX(vor_vx_h, 2)
1082 GEN_VEXT_VX(vor_vx_w, 4)
1083 GEN_VEXT_VX(vor_vx_d, 8)
1084 GEN_VEXT_VX(vxor_vx_b, 1)
1085 GEN_VEXT_VX(vxor_vx_h, 2)
1086 GEN_VEXT_VX(vxor_vx_w, 4)
1087 GEN_VEXT_VX(vxor_vx_d, 8)
1088
1089 /* Vector Single-Width Bit Shift Instructions */
1090 #define DO_SLL(N, M) (N << (M))
1091 #define DO_SRL(N, M) (N >> (M))
1092
1093 /* generate the helpers for shift instructions with two vector operators */
1094 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1095 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1096 void *vs2, CPURISCVState *env, uint32_t desc) \
1097 { \
1098 uint32_t vm = vext_vm(desc); \
1099 uint32_t vl = env->vl; \
1100 uint32_t esz = sizeof(TS1); \
1101 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1102 uint32_t vta = vext_vta(desc); \
1103 uint32_t vma = vext_vma(desc); \
1104 uint32_t i; \
1105 \
1106 VSTART_CHECK_EARLY_EXIT(env); \
1107 \
1108 for (i = env->vstart; i < vl; i++) { \
1109 if (!vm && !vext_elem_mask(v0, i)) { \
1110 /* set masked-off elements to 1s */ \
1111 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
1112 continue; \
1113 } \
1114 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1115 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1116 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1117 } \
1118 env->vstart = 0; \
1119 /* set tail elements to 1s */ \
1120 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1121 }
1122
1123 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1124 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1125 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1126 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1127
1128 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1129 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1130 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1131 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1132
1133 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1134 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1135 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1136 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1137
1138 /*
1139 * generate the helpers for shift instructions with one vector and one scalar
1140 */
1141 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1142 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1143 void *vs2, CPURISCVState *env, \
1144 uint32_t desc) \
1145 { \
1146 uint32_t vm = vext_vm(desc); \
1147 uint32_t vl = env->vl; \
1148 uint32_t esz = sizeof(TD); \
1149 uint32_t total_elems = \
1150 vext_get_total_elems(env, desc, esz); \
1151 uint32_t vta = vext_vta(desc); \
1152 uint32_t vma = vext_vma(desc); \
1153 uint32_t i; \
1154 \
1155 VSTART_CHECK_EARLY_EXIT(env); \
1156 \
1157 for (i = env->vstart; i < vl; i++) { \
1158 if (!vm && !vext_elem_mask(v0, i)) { \
1159 /* set masked-off elements to 1s */ \
1160 vext_set_elems_1s(vd, vma, i * esz, \
1161 (i + 1) * esz); \
1162 continue; \
1163 } \
1164 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1165 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1166 } \
1167 env->vstart = 0; \
1168 /* set tail elements to 1s */ \
1169 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
1170 }
1171
1172 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1173 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1174 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1175 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1176
1177 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1178 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1179 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1180 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1181
1182 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1183 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1184 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1185 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1186
1187 /* Vector Narrowing Integer Right Shift Instructions */
1188 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1189 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1190 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1191 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1192 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1193 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1194 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1195 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1196 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1197 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1198 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1199 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1200
1201 /* Vector Integer Comparison Instructions */
1202 #define DO_MSEQ(N, M) (N == M)
1203 #define DO_MSNE(N, M) (N != M)
1204 #define DO_MSLT(N, M) (N < M)
1205 #define DO_MSLE(N, M) (N <= M)
1206 #define DO_MSGT(N, M) (N > M)
1207
1208 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1209 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1210 CPURISCVState *env, uint32_t desc) \
1211 { \
1212 uint32_t vm = vext_vm(desc); \
1213 uint32_t vl = env->vl; \
1214 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1215 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1216 uint32_t vma = vext_vma(desc); \
1217 uint32_t i; \
1218 \
1219 VSTART_CHECK_EARLY_EXIT(env); \
1220 \
1221 for (i = env->vstart; i < vl; i++) { \
1222 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1223 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1224 if (!vm && !vext_elem_mask(v0, i)) { \
1225 /* set masked-off elements to 1s */ \
1226 if (vma) { \
1227 vext_set_elem_mask(vd, i, 1); \
1228 } \
1229 continue; \
1230 } \
1231 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1232 } \
1233 env->vstart = 0; \
1234 /*
1235 * mask destination register are always tail-agnostic
1236 * set tail elements to 1s
1237 */ \
1238 if (vta_all_1s) { \
1239 for (; i < total_elems; i++) { \
1240 vext_set_elem_mask(vd, i, 1); \
1241 } \
1242 } \
1243 }
1244
1245 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1246 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1247 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1248 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1249
1250 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1251 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1252 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1253 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1254
1255 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1256 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1257 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1258 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1259
1260 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1261 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1262 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1263 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1264
1265 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1266 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1267 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1268 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1269
1270 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1271 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1272 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1273 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1274
1275 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1276 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1277 CPURISCVState *env, uint32_t desc) \
1278 { \
1279 uint32_t vm = vext_vm(desc); \
1280 uint32_t vl = env->vl; \
1281 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1282 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1283 uint32_t vma = vext_vma(desc); \
1284 uint32_t i; \
1285 \
1286 VSTART_CHECK_EARLY_EXIT(env); \
1287 \
1288 for (i = env->vstart; i < vl; i++) { \
1289 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1290 if (!vm && !vext_elem_mask(v0, i)) { \
1291 /* set masked-off elements to 1s */ \
1292 if (vma) { \
1293 vext_set_elem_mask(vd, i, 1); \
1294 } \
1295 continue; \
1296 } \
1297 vext_set_elem_mask(vd, i, \
1298 DO_OP(s2, (ETYPE)(target_long)s1)); \
1299 } \
1300 env->vstart = 0; \
1301 /*
1302 * mask destination register are always tail-agnostic
1303 * set tail elements to 1s
1304 */ \
1305 if (vta_all_1s) { \
1306 for (; i < total_elems; i++) { \
1307 vext_set_elem_mask(vd, i, 1); \
1308 } \
1309 } \
1310 }
1311
1312 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1313 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1314 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1315 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1316
1317 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1318 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1319 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1320 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1321
1322 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1323 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1324 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1325 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1326
1327 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1328 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1329 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1330 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1331
1332 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1333 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1334 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1335 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1336
1337 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1338 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1339 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1340 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1341
1342 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1343 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1344 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1345 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1346
1347 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1348 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1349 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1350 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1351
1352 /* Vector Integer Min/Max Instructions */
1353 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1354 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1355 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1356 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1357 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1358 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1359 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1360 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1361 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1362 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1363 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1364 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1365 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1366 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1367 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1368 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1369 GEN_VEXT_VV(vminu_vv_b, 1)
1370 GEN_VEXT_VV(vminu_vv_h, 2)
1371 GEN_VEXT_VV(vminu_vv_w, 4)
1372 GEN_VEXT_VV(vminu_vv_d, 8)
1373 GEN_VEXT_VV(vmin_vv_b, 1)
1374 GEN_VEXT_VV(vmin_vv_h, 2)
1375 GEN_VEXT_VV(vmin_vv_w, 4)
1376 GEN_VEXT_VV(vmin_vv_d, 8)
1377 GEN_VEXT_VV(vmaxu_vv_b, 1)
1378 GEN_VEXT_VV(vmaxu_vv_h, 2)
1379 GEN_VEXT_VV(vmaxu_vv_w, 4)
1380 GEN_VEXT_VV(vmaxu_vv_d, 8)
1381 GEN_VEXT_VV(vmax_vv_b, 1)
1382 GEN_VEXT_VV(vmax_vv_h, 2)
1383 GEN_VEXT_VV(vmax_vv_w, 4)
1384 GEN_VEXT_VV(vmax_vv_d, 8)
1385
1386 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1387 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1388 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1389 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1390 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1391 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1392 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1393 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1394 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1395 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1396 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1397 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1398 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1399 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1400 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1401 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1402 GEN_VEXT_VX(vminu_vx_b, 1)
1403 GEN_VEXT_VX(vminu_vx_h, 2)
1404 GEN_VEXT_VX(vminu_vx_w, 4)
1405 GEN_VEXT_VX(vminu_vx_d, 8)
1406 GEN_VEXT_VX(vmin_vx_b, 1)
1407 GEN_VEXT_VX(vmin_vx_h, 2)
1408 GEN_VEXT_VX(vmin_vx_w, 4)
1409 GEN_VEXT_VX(vmin_vx_d, 8)
1410 GEN_VEXT_VX(vmaxu_vx_b, 1)
1411 GEN_VEXT_VX(vmaxu_vx_h, 2)
1412 GEN_VEXT_VX(vmaxu_vx_w, 4)
1413 GEN_VEXT_VX(vmaxu_vx_d, 8)
1414 GEN_VEXT_VX(vmax_vx_b, 1)
1415 GEN_VEXT_VX(vmax_vx_h, 2)
1416 GEN_VEXT_VX(vmax_vx_w, 4)
1417 GEN_VEXT_VX(vmax_vx_d, 8)
1418
1419 /* Vector Single-Width Integer Multiply Instructions */
1420 #define DO_MUL(N, M) (N * M)
1421 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1422 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1423 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1424 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1425 GEN_VEXT_VV(vmul_vv_b, 1)
1426 GEN_VEXT_VV(vmul_vv_h, 2)
1427 GEN_VEXT_VV(vmul_vv_w, 4)
1428 GEN_VEXT_VV(vmul_vv_d, 8)
1429
1430 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1431 {
1432 return (int16_t)s2 * (int16_t)s1 >> 8;
1433 }
1434
do_mulh_h(int16_t s2,int16_t s1)1435 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1436 {
1437 return (int32_t)s2 * (int32_t)s1 >> 16;
1438 }
1439
do_mulh_w(int32_t s2,int32_t s1)1440 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1441 {
1442 return (int64_t)s2 * (int64_t)s1 >> 32;
1443 }
1444
do_mulh_d(int64_t s2,int64_t s1)1445 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1446 {
1447 uint64_t hi_64, lo_64;
1448
1449 muls64(&lo_64, &hi_64, s1, s2);
1450 return hi_64;
1451 }
1452
do_mulhu_b(uint8_t s2,uint8_t s1)1453 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1454 {
1455 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1456 }
1457
do_mulhu_h(uint16_t s2,uint16_t s1)1458 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1459 {
1460 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1461 }
1462
do_mulhu_w(uint32_t s2,uint32_t s1)1463 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1464 {
1465 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1466 }
1467
do_mulhu_d(uint64_t s2,uint64_t s1)1468 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1469 {
1470 uint64_t hi_64, lo_64;
1471
1472 mulu64(&lo_64, &hi_64, s2, s1);
1473 return hi_64;
1474 }
1475
do_mulhsu_b(int8_t s2,uint8_t s1)1476 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1477 {
1478 return (int16_t)s2 * (uint16_t)s1 >> 8;
1479 }
1480
do_mulhsu_h(int16_t s2,uint16_t s1)1481 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1482 {
1483 return (int32_t)s2 * (uint32_t)s1 >> 16;
1484 }
1485
do_mulhsu_w(int32_t s2,uint32_t s1)1486 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1487 {
1488 return (int64_t)s2 * (uint64_t)s1 >> 32;
1489 }
1490
1491 /*
1492 * Let A = signed operand,
1493 * B = unsigned operand
1494 * P = mulu64(A, B), unsigned product
1495 *
1496 * LET X = 2 ** 64 - A, 2's complement of A
1497 * SP = signed product
1498 * THEN
1499 * IF A < 0
1500 * SP = -X * B
1501 * = -(2 ** 64 - A) * B
1502 * = A * B - 2 ** 64 * B
1503 * = P - 2 ** 64 * B
1504 * ELSE
1505 * SP = P
1506 * THEN
1507 * HI_P -= (A < 0 ? B : 0)
1508 */
1509
do_mulhsu_d(int64_t s2,uint64_t s1)1510 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1511 {
1512 uint64_t hi_64, lo_64;
1513
1514 mulu64(&lo_64, &hi_64, s2, s1);
1515
1516 hi_64 -= s2 < 0 ? s1 : 0;
1517 return hi_64;
1518 }
1519
1520 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1521 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1522 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1523 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1524 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1525 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1526 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1527 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1528 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1529 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1530 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1531 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1532 GEN_VEXT_VV(vmulh_vv_b, 1)
1533 GEN_VEXT_VV(vmulh_vv_h, 2)
1534 GEN_VEXT_VV(vmulh_vv_w, 4)
1535 GEN_VEXT_VV(vmulh_vv_d, 8)
1536 GEN_VEXT_VV(vmulhu_vv_b, 1)
1537 GEN_VEXT_VV(vmulhu_vv_h, 2)
1538 GEN_VEXT_VV(vmulhu_vv_w, 4)
1539 GEN_VEXT_VV(vmulhu_vv_d, 8)
1540 GEN_VEXT_VV(vmulhsu_vv_b, 1)
1541 GEN_VEXT_VV(vmulhsu_vv_h, 2)
1542 GEN_VEXT_VV(vmulhsu_vv_w, 4)
1543 GEN_VEXT_VV(vmulhsu_vv_d, 8)
1544
1545 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1546 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1547 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1548 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1549 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1550 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1551 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1552 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1553 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1554 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1555 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1556 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1557 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1558 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1559 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1560 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1561 GEN_VEXT_VX(vmul_vx_b, 1)
1562 GEN_VEXT_VX(vmul_vx_h, 2)
1563 GEN_VEXT_VX(vmul_vx_w, 4)
1564 GEN_VEXT_VX(vmul_vx_d, 8)
1565 GEN_VEXT_VX(vmulh_vx_b, 1)
1566 GEN_VEXT_VX(vmulh_vx_h, 2)
1567 GEN_VEXT_VX(vmulh_vx_w, 4)
1568 GEN_VEXT_VX(vmulh_vx_d, 8)
1569 GEN_VEXT_VX(vmulhu_vx_b, 1)
1570 GEN_VEXT_VX(vmulhu_vx_h, 2)
1571 GEN_VEXT_VX(vmulhu_vx_w, 4)
1572 GEN_VEXT_VX(vmulhu_vx_d, 8)
1573 GEN_VEXT_VX(vmulhsu_vx_b, 1)
1574 GEN_VEXT_VX(vmulhsu_vx_h, 2)
1575 GEN_VEXT_VX(vmulhsu_vx_w, 4)
1576 GEN_VEXT_VX(vmulhsu_vx_d, 8)
1577
1578 /* Vector Integer Divide Instructions */
1579 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1580 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1581 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
1582 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1583 #define DO_REM(N, M) (unlikely(M == 0) ? N : \
1584 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1585
1586 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1587 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1588 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1589 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1590 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1591 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1592 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1593 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1594 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1595 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1596 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1597 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1598 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1599 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1600 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1601 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1602 GEN_VEXT_VV(vdivu_vv_b, 1)
1603 GEN_VEXT_VV(vdivu_vv_h, 2)
1604 GEN_VEXT_VV(vdivu_vv_w, 4)
1605 GEN_VEXT_VV(vdivu_vv_d, 8)
1606 GEN_VEXT_VV(vdiv_vv_b, 1)
1607 GEN_VEXT_VV(vdiv_vv_h, 2)
1608 GEN_VEXT_VV(vdiv_vv_w, 4)
1609 GEN_VEXT_VV(vdiv_vv_d, 8)
1610 GEN_VEXT_VV(vremu_vv_b, 1)
1611 GEN_VEXT_VV(vremu_vv_h, 2)
1612 GEN_VEXT_VV(vremu_vv_w, 4)
1613 GEN_VEXT_VV(vremu_vv_d, 8)
1614 GEN_VEXT_VV(vrem_vv_b, 1)
1615 GEN_VEXT_VV(vrem_vv_h, 2)
1616 GEN_VEXT_VV(vrem_vv_w, 4)
1617 GEN_VEXT_VV(vrem_vv_d, 8)
1618
1619 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1620 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1621 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1622 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1623 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1624 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1625 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1626 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1627 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1628 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1629 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1630 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1631 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1632 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1633 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1634 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1635 GEN_VEXT_VX(vdivu_vx_b, 1)
1636 GEN_VEXT_VX(vdivu_vx_h, 2)
1637 GEN_VEXT_VX(vdivu_vx_w, 4)
1638 GEN_VEXT_VX(vdivu_vx_d, 8)
1639 GEN_VEXT_VX(vdiv_vx_b, 1)
1640 GEN_VEXT_VX(vdiv_vx_h, 2)
1641 GEN_VEXT_VX(vdiv_vx_w, 4)
1642 GEN_VEXT_VX(vdiv_vx_d, 8)
1643 GEN_VEXT_VX(vremu_vx_b, 1)
1644 GEN_VEXT_VX(vremu_vx_h, 2)
1645 GEN_VEXT_VX(vremu_vx_w, 4)
1646 GEN_VEXT_VX(vremu_vx_d, 8)
1647 GEN_VEXT_VX(vrem_vx_b, 1)
1648 GEN_VEXT_VX(vrem_vx_h, 2)
1649 GEN_VEXT_VX(vrem_vx_w, 4)
1650 GEN_VEXT_VX(vrem_vx_d, 8)
1651
1652 /* Vector Widening Integer Multiply Instructions */
1653 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1654 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1655 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1656 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1657 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1658 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1659 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1660 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1661 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1662 GEN_VEXT_VV(vwmul_vv_b, 2)
1663 GEN_VEXT_VV(vwmul_vv_h, 4)
1664 GEN_VEXT_VV(vwmul_vv_w, 8)
1665 GEN_VEXT_VV(vwmulu_vv_b, 2)
1666 GEN_VEXT_VV(vwmulu_vv_h, 4)
1667 GEN_VEXT_VV(vwmulu_vv_w, 8)
1668 GEN_VEXT_VV(vwmulsu_vv_b, 2)
1669 GEN_VEXT_VV(vwmulsu_vv_h, 4)
1670 GEN_VEXT_VV(vwmulsu_vv_w, 8)
1671
1672 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1673 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1674 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1675 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1676 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1677 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1678 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1679 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1680 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1681 GEN_VEXT_VX(vwmul_vx_b, 2)
1682 GEN_VEXT_VX(vwmul_vx_h, 4)
1683 GEN_VEXT_VX(vwmul_vx_w, 8)
1684 GEN_VEXT_VX(vwmulu_vx_b, 2)
1685 GEN_VEXT_VX(vwmulu_vx_h, 4)
1686 GEN_VEXT_VX(vwmulu_vx_w, 8)
1687 GEN_VEXT_VX(vwmulsu_vx_b, 2)
1688 GEN_VEXT_VX(vwmulsu_vx_h, 4)
1689 GEN_VEXT_VX(vwmulsu_vx_w, 8)
1690
1691 /* Vector Single-Width Integer Multiply-Add Instructions */
1692 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1693 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1694 { \
1695 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1696 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1697 TD d = *((TD *)vd + HD(i)); \
1698 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1699 }
1700
1701 #define DO_MACC(N, M, D) (M * N + D)
1702 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1703 #define DO_MADD(N, M, D) (M * D + N)
1704 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1705 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1706 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1707 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1708 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1709 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1710 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1711 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1712 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1713 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1714 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1715 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1716 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1717 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1718 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1719 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1720 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1721 GEN_VEXT_VV(vmacc_vv_b, 1)
1722 GEN_VEXT_VV(vmacc_vv_h, 2)
1723 GEN_VEXT_VV(vmacc_vv_w, 4)
1724 GEN_VEXT_VV(vmacc_vv_d, 8)
1725 GEN_VEXT_VV(vnmsac_vv_b, 1)
1726 GEN_VEXT_VV(vnmsac_vv_h, 2)
1727 GEN_VEXT_VV(vnmsac_vv_w, 4)
1728 GEN_VEXT_VV(vnmsac_vv_d, 8)
1729 GEN_VEXT_VV(vmadd_vv_b, 1)
1730 GEN_VEXT_VV(vmadd_vv_h, 2)
1731 GEN_VEXT_VV(vmadd_vv_w, 4)
1732 GEN_VEXT_VV(vmadd_vv_d, 8)
1733 GEN_VEXT_VV(vnmsub_vv_b, 1)
1734 GEN_VEXT_VV(vnmsub_vv_h, 2)
1735 GEN_VEXT_VV(vnmsub_vv_w, 4)
1736 GEN_VEXT_VV(vnmsub_vv_d, 8)
1737
1738 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1739 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1740 { \
1741 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1742 TD d = *((TD *)vd + HD(i)); \
1743 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1744 }
1745
1746 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1747 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1748 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1749 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1750 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1751 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1752 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1753 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1754 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1755 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1756 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1757 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1758 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1759 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1760 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1761 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1762 GEN_VEXT_VX(vmacc_vx_b, 1)
1763 GEN_VEXT_VX(vmacc_vx_h, 2)
1764 GEN_VEXT_VX(vmacc_vx_w, 4)
1765 GEN_VEXT_VX(vmacc_vx_d, 8)
1766 GEN_VEXT_VX(vnmsac_vx_b, 1)
1767 GEN_VEXT_VX(vnmsac_vx_h, 2)
1768 GEN_VEXT_VX(vnmsac_vx_w, 4)
1769 GEN_VEXT_VX(vnmsac_vx_d, 8)
1770 GEN_VEXT_VX(vmadd_vx_b, 1)
1771 GEN_VEXT_VX(vmadd_vx_h, 2)
1772 GEN_VEXT_VX(vmadd_vx_w, 4)
1773 GEN_VEXT_VX(vmadd_vx_d, 8)
1774 GEN_VEXT_VX(vnmsub_vx_b, 1)
1775 GEN_VEXT_VX(vnmsub_vx_h, 2)
1776 GEN_VEXT_VX(vnmsub_vx_w, 4)
1777 GEN_VEXT_VX(vnmsub_vx_d, 8)
1778
1779 /* Vector Widening Integer Multiply-Add Instructions */
1780 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1781 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1782 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1783 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1784 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1785 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1786 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1787 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1788 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1789 GEN_VEXT_VV(vwmaccu_vv_b, 2)
1790 GEN_VEXT_VV(vwmaccu_vv_h, 4)
1791 GEN_VEXT_VV(vwmaccu_vv_w, 8)
1792 GEN_VEXT_VV(vwmacc_vv_b, 2)
1793 GEN_VEXT_VV(vwmacc_vv_h, 4)
1794 GEN_VEXT_VV(vwmacc_vv_w, 8)
1795 GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1796 GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1797 GEN_VEXT_VV(vwmaccsu_vv_w, 8)
1798
1799 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1800 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1801 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1802 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1803 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1804 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1805 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1806 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1807 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1808 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1809 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1810 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1811 GEN_VEXT_VX(vwmaccu_vx_b, 2)
1812 GEN_VEXT_VX(vwmaccu_vx_h, 4)
1813 GEN_VEXT_VX(vwmaccu_vx_w, 8)
1814 GEN_VEXT_VX(vwmacc_vx_b, 2)
1815 GEN_VEXT_VX(vwmacc_vx_h, 4)
1816 GEN_VEXT_VX(vwmacc_vx_w, 8)
1817 GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1818 GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1819 GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1820 GEN_VEXT_VX(vwmaccus_vx_b, 2)
1821 GEN_VEXT_VX(vwmaccus_vx_h, 4)
1822 GEN_VEXT_VX(vwmaccus_vx_w, 8)
1823
1824 /* Vector Integer Merge and Move Instructions */
1825 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
1826 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1827 uint32_t desc) \
1828 { \
1829 uint32_t vl = env->vl; \
1830 uint32_t esz = sizeof(ETYPE); \
1831 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1832 uint32_t vta = vext_vta(desc); \
1833 uint32_t i; \
1834 \
1835 VSTART_CHECK_EARLY_EXIT(env); \
1836 \
1837 for (i = env->vstart; i < vl; i++) { \
1838 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1839 *((ETYPE *)vd + H(i)) = s1; \
1840 } \
1841 env->vstart = 0; \
1842 /* set tail elements to 1s */ \
1843 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1844 }
1845
1846 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1847 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1848 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1849 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1850
1851 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
1852 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1853 uint32_t desc) \
1854 { \
1855 uint32_t vl = env->vl; \
1856 uint32_t esz = sizeof(ETYPE); \
1857 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1858 uint32_t vta = vext_vta(desc); \
1859 uint32_t i; \
1860 \
1861 VSTART_CHECK_EARLY_EXIT(env); \
1862 \
1863 for (i = env->vstart; i < vl; i++) { \
1864 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1865 } \
1866 env->vstart = 0; \
1867 /* set tail elements to 1s */ \
1868 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1869 }
1870
1871 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1872 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1873 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1874 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1875
1876 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
1877 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1878 CPURISCVState *env, uint32_t desc) \
1879 { \
1880 uint32_t vl = env->vl; \
1881 uint32_t esz = sizeof(ETYPE); \
1882 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1883 uint32_t vta = vext_vta(desc); \
1884 uint32_t i; \
1885 \
1886 VSTART_CHECK_EARLY_EXIT(env); \
1887 \
1888 for (i = env->vstart; i < vl; i++) { \
1889 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
1890 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1891 } \
1892 env->vstart = 0; \
1893 /* set tail elements to 1s */ \
1894 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1895 }
1896
1897 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1898 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1899 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1900 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1901
1902 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
1903 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1904 void *vs2, CPURISCVState *env, uint32_t desc) \
1905 { \
1906 uint32_t vl = env->vl; \
1907 uint32_t esz = sizeof(ETYPE); \
1908 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1909 uint32_t vta = vext_vta(desc); \
1910 uint32_t i; \
1911 \
1912 VSTART_CHECK_EARLY_EXIT(env); \
1913 \
1914 for (i = env->vstart; i < vl; i++) { \
1915 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1916 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
1917 (ETYPE)(target_long)s1); \
1918 *((ETYPE *)vd + H(i)) = d; \
1919 } \
1920 env->vstart = 0; \
1921 /* set tail elements to 1s */ \
1922 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1923 }
1924
1925 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1926 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1927 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1928 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1929
1930 /*
1931 * Vector Fixed-Point Arithmetic Instructions
1932 */
1933
1934 /* Vector Single-Width Saturating Add and Subtract */
1935
1936 /*
1937 * As fixed point instructions probably have round mode and saturation,
1938 * define common macros for fixed point here.
1939 */
1940 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1941 CPURISCVState *env, int vxrm);
1942
1943 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1944 static inline void \
1945 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1946 CPURISCVState *env, int vxrm) \
1947 { \
1948 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1949 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1950 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1951 }
1952
1953 static inline void
vext_vv_rm_1(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivv2_rm_fn * fn,uint32_t vma,uint32_t esz)1954 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1955 CPURISCVState *env,
1956 uint32_t vl, uint32_t vm, int vxrm,
1957 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
1958 {
1959 VSTART_CHECK_EARLY_EXIT(env);
1960
1961 for (uint32_t i = env->vstart; i < vl; i++) {
1962 if (!vm && !vext_elem_mask(v0, i)) {
1963 /* set masked-off elements to 1s */
1964 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
1965 continue;
1966 }
1967 fn(vd, vs1, vs2, i, env, vxrm);
1968 }
1969 env->vstart = 0;
1970 }
1971
1972 static inline void
vext_vv_rm_2(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t desc,opivv2_rm_fn * fn,uint32_t esz)1973 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1974 CPURISCVState *env,
1975 uint32_t desc,
1976 opivv2_rm_fn *fn, uint32_t esz)
1977 {
1978 uint32_t vm = vext_vm(desc);
1979 uint32_t vl = env->vl;
1980 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
1981 uint32_t vta = vext_vta(desc);
1982 uint32_t vma = vext_vma(desc);
1983
1984 switch (env->vxrm) {
1985 case 0: /* rnu */
1986 vext_vv_rm_1(vd, v0, vs1, vs2,
1987 env, vl, vm, 0, fn, vma, esz);
1988 break;
1989 case 1: /* rne */
1990 vext_vv_rm_1(vd, v0, vs1, vs2,
1991 env, vl, vm, 1, fn, vma, esz);
1992 break;
1993 case 2: /* rdn */
1994 vext_vv_rm_1(vd, v0, vs1, vs2,
1995 env, vl, vm, 2, fn, vma, esz);
1996 break;
1997 default: /* rod */
1998 vext_vv_rm_1(vd, v0, vs1, vs2,
1999 env, vl, vm, 3, fn, vma, esz);
2000 break;
2001 }
2002 /* set tail elements to 1s */
2003 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2004 }
2005
2006 /* generate helpers for fixed point instructions with OPIVV format */
2007 #define GEN_VEXT_VV_RM(NAME, ESZ) \
2008 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2009 CPURISCVState *env, uint32_t desc) \
2010 { \
2011 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2012 do_##NAME, ESZ); \
2013 }
2014
saddu8(CPURISCVState * env,int vxrm,uint8_t a,uint8_t b)2015 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2016 uint8_t b)
2017 {
2018 uint8_t res = a + b;
2019 if (res < a) {
2020 res = UINT8_MAX;
2021 env->vxsat = 0x1;
2022 }
2023 return res;
2024 }
2025
saddu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2026 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2027 uint16_t b)
2028 {
2029 uint16_t res = a + b;
2030 if (res < a) {
2031 res = UINT16_MAX;
2032 env->vxsat = 0x1;
2033 }
2034 return res;
2035 }
2036
saddu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2037 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2038 uint32_t b)
2039 {
2040 uint32_t res = a + b;
2041 if (res < a) {
2042 res = UINT32_MAX;
2043 env->vxsat = 0x1;
2044 }
2045 return res;
2046 }
2047
saddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2048 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2049 uint64_t b)
2050 {
2051 uint64_t res = a + b;
2052 if (res < a) {
2053 res = UINT64_MAX;
2054 env->vxsat = 0x1;
2055 }
2056 return res;
2057 }
2058
2059 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2060 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2061 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2062 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2063 GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2064 GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2065 GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2066 GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
2067
2068 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2069 CPURISCVState *env, int vxrm);
2070
2071 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2072 static inline void \
2073 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2074 CPURISCVState *env, int vxrm) \
2075 { \
2076 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2077 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2078 }
2079
2080 static inline void
vext_vx_rm_1(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivx2_rm_fn * fn,uint32_t vma,uint32_t esz)2081 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2082 CPURISCVState *env,
2083 uint32_t vl, uint32_t vm, int vxrm,
2084 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
2085 {
2086 VSTART_CHECK_EARLY_EXIT(env);
2087
2088 for (uint32_t i = env->vstart; i < vl; i++) {
2089 if (!vm && !vext_elem_mask(v0, i)) {
2090 /* set masked-off elements to 1s */
2091 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2092 continue;
2093 }
2094 fn(vd, s1, vs2, i, env, vxrm);
2095 }
2096 env->vstart = 0;
2097 }
2098
2099 static inline void
vext_vx_rm_2(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t desc,opivx2_rm_fn * fn,uint32_t esz)2100 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2101 CPURISCVState *env,
2102 uint32_t desc,
2103 opivx2_rm_fn *fn, uint32_t esz)
2104 {
2105 uint32_t vm = vext_vm(desc);
2106 uint32_t vl = env->vl;
2107 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2108 uint32_t vta = vext_vta(desc);
2109 uint32_t vma = vext_vma(desc);
2110
2111 switch (env->vxrm) {
2112 case 0: /* rnu */
2113 vext_vx_rm_1(vd, v0, s1, vs2,
2114 env, vl, vm, 0, fn, vma, esz);
2115 break;
2116 case 1: /* rne */
2117 vext_vx_rm_1(vd, v0, s1, vs2,
2118 env, vl, vm, 1, fn, vma, esz);
2119 break;
2120 case 2: /* rdn */
2121 vext_vx_rm_1(vd, v0, s1, vs2,
2122 env, vl, vm, 2, fn, vma, esz);
2123 break;
2124 default: /* rod */
2125 vext_vx_rm_1(vd, v0, s1, vs2,
2126 env, vl, vm, 3, fn, vma, esz);
2127 break;
2128 }
2129 /* set tail elements to 1s */
2130 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2131 }
2132
2133 /* generate helpers for fixed point instructions with OPIVX format */
2134 #define GEN_VEXT_VX_RM(NAME, ESZ) \
2135 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2136 void *vs2, CPURISCVState *env, \
2137 uint32_t desc) \
2138 { \
2139 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2140 do_##NAME, ESZ); \
2141 }
2142
RVVCALL(OPIVX2_RM,vsaddu_vx_b,OP_UUU_B,H1,H1,saddu8)2143 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2144 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2145 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2146 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2147 GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2148 GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2149 GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2150 GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
2151
2152 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2153 {
2154 int8_t res = a + b;
2155 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2156 res = a > 0 ? INT8_MAX : INT8_MIN;
2157 env->vxsat = 0x1;
2158 }
2159 return res;
2160 }
2161
sadd16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2162 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2163 int16_t b)
2164 {
2165 int16_t res = a + b;
2166 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2167 res = a > 0 ? INT16_MAX : INT16_MIN;
2168 env->vxsat = 0x1;
2169 }
2170 return res;
2171 }
2172
sadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2173 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2174 int32_t b)
2175 {
2176 int32_t res = a + b;
2177 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2178 res = a > 0 ? INT32_MAX : INT32_MIN;
2179 env->vxsat = 0x1;
2180 }
2181 return res;
2182 }
2183
sadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2184 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2185 int64_t b)
2186 {
2187 int64_t res = a + b;
2188 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2189 res = a > 0 ? INT64_MAX : INT64_MIN;
2190 env->vxsat = 0x1;
2191 }
2192 return res;
2193 }
2194
RVVCALL(OPIVV2_RM,vsadd_vv_b,OP_SSS_B,H1,H1,H1,sadd8)2195 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2196 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2197 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2198 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2199 GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2200 GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2201 GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2202 GEN_VEXT_VV_RM(vsadd_vv_d, 8)
2203
2204 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2205 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2206 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2207 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2208 GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2209 GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2210 GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2211 GEN_VEXT_VX_RM(vsadd_vx_d, 8)
2212
2213 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2214 uint8_t b)
2215 {
2216 uint8_t res = a - b;
2217 if (res > a) {
2218 res = 0;
2219 env->vxsat = 0x1;
2220 }
2221 return res;
2222 }
2223
ssubu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2224 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2225 uint16_t b)
2226 {
2227 uint16_t res = a - b;
2228 if (res > a) {
2229 res = 0;
2230 env->vxsat = 0x1;
2231 }
2232 return res;
2233 }
2234
ssubu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2235 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2236 uint32_t b)
2237 {
2238 uint32_t res = a - b;
2239 if (res > a) {
2240 res = 0;
2241 env->vxsat = 0x1;
2242 }
2243 return res;
2244 }
2245
ssubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2246 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2247 uint64_t b)
2248 {
2249 uint64_t res = a - b;
2250 if (res > a) {
2251 res = 0;
2252 env->vxsat = 0x1;
2253 }
2254 return res;
2255 }
2256
RVVCALL(OPIVV2_RM,vssubu_vv_b,OP_UUU_B,H1,H1,H1,ssubu8)2257 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2258 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2259 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2260 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2261 GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2262 GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2263 GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2264 GEN_VEXT_VV_RM(vssubu_vv_d, 8)
2265
2266 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2267 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2268 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2269 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2270 GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2271 GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2272 GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2273 GEN_VEXT_VX_RM(vssubu_vx_d, 8)
2274
2275 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2276 {
2277 int8_t res = a - b;
2278 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2279 res = a >= 0 ? INT8_MAX : INT8_MIN;
2280 env->vxsat = 0x1;
2281 }
2282 return res;
2283 }
2284
ssub16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2285 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2286 int16_t b)
2287 {
2288 int16_t res = a - b;
2289 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2290 res = a >= 0 ? INT16_MAX : INT16_MIN;
2291 env->vxsat = 0x1;
2292 }
2293 return res;
2294 }
2295
ssub32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2296 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2297 int32_t b)
2298 {
2299 int32_t res = a - b;
2300 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2301 res = a >= 0 ? INT32_MAX : INT32_MIN;
2302 env->vxsat = 0x1;
2303 }
2304 return res;
2305 }
2306
ssub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2307 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2308 int64_t b)
2309 {
2310 int64_t res = a - b;
2311 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2312 res = a >= 0 ? INT64_MAX : INT64_MIN;
2313 env->vxsat = 0x1;
2314 }
2315 return res;
2316 }
2317
RVVCALL(OPIVV2_RM,vssub_vv_b,OP_SSS_B,H1,H1,H1,ssub8)2318 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2319 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2320 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2321 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2322 GEN_VEXT_VV_RM(vssub_vv_b, 1)
2323 GEN_VEXT_VV_RM(vssub_vv_h, 2)
2324 GEN_VEXT_VV_RM(vssub_vv_w, 4)
2325 GEN_VEXT_VV_RM(vssub_vv_d, 8)
2326
2327 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2328 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2329 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2330 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2331 GEN_VEXT_VX_RM(vssub_vx_b, 1)
2332 GEN_VEXT_VX_RM(vssub_vx_h, 2)
2333 GEN_VEXT_VX_RM(vssub_vx_w, 4)
2334 GEN_VEXT_VX_RM(vssub_vx_d, 8)
2335
2336 /* Vector Single-Width Averaging Add and Subtract */
2337 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2338 {
2339 uint8_t d = extract64(v, shift, 1);
2340 uint8_t d1;
2341 uint64_t D1, D2;
2342
2343 if (shift == 0 || shift > 64) {
2344 return 0;
2345 }
2346
2347 d1 = extract64(v, shift - 1, 1);
2348 D1 = extract64(v, 0, shift);
2349 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2350 return d1;
2351 } else if (vxrm == 1) { /* round-to-nearest-even */
2352 if (shift > 1) {
2353 D2 = extract64(v, 0, shift - 1);
2354 return d1 & ((D2 != 0) | d);
2355 } else {
2356 return d1 & d;
2357 }
2358 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2359 return !d & (D1 != 0);
2360 }
2361 return 0; /* round-down (truncate) */
2362 }
2363
aadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2364 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2365 int32_t b)
2366 {
2367 int64_t res = (int64_t)a + b;
2368 uint8_t round = get_round(vxrm, res, 1);
2369
2370 return (res >> 1) + round;
2371 }
2372
aadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2373 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2374 int64_t b)
2375 {
2376 int64_t res = a + b;
2377 uint8_t round = get_round(vxrm, res, 1);
2378 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2379
2380 /* With signed overflow, bit 64 is inverse of bit 63. */
2381 return ((res >> 1) ^ over) + round;
2382 }
2383
RVVCALL(OPIVV2_RM,vaadd_vv_b,OP_SSS_B,H1,H1,H1,aadd32)2384 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2385 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2386 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2387 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2388 GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2389 GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2390 GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2391 GEN_VEXT_VV_RM(vaadd_vv_d, 8)
2392
2393 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2394 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2395 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2396 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2397 GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2398 GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2399 GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2400 GEN_VEXT_VX_RM(vaadd_vx_d, 8)
2401
2402 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2403 uint32_t a, uint32_t b)
2404 {
2405 uint64_t res = (uint64_t)a + b;
2406 uint8_t round = get_round(vxrm, res, 1);
2407
2408 return (res >> 1) + round;
2409 }
2410
aaddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2411 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2412 uint64_t a, uint64_t b)
2413 {
2414 uint64_t res = a + b;
2415 uint8_t round = get_round(vxrm, res, 1);
2416 uint64_t over = (uint64_t)(res < a) << 63;
2417
2418 return ((res >> 1) | over) + round;
2419 }
2420
RVVCALL(OPIVV2_RM,vaaddu_vv_b,OP_UUU_B,H1,H1,H1,aaddu32)2421 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2422 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2423 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2424 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2425 GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2426 GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2427 GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2428 GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
2429
2430 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2431 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2432 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2433 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2434 GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2435 GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2436 GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2437 GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
2438
2439 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2440 int32_t b)
2441 {
2442 int64_t res = (int64_t)a - b;
2443 uint8_t round = get_round(vxrm, res, 1);
2444
2445 return (res >> 1) + round;
2446 }
2447
asub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2448 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2449 int64_t b)
2450 {
2451 int64_t res = (int64_t)a - b;
2452 uint8_t round = get_round(vxrm, res, 1);
2453 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2454
2455 /* With signed overflow, bit 64 is inverse of bit 63. */
2456 return ((res >> 1) ^ over) + round;
2457 }
2458
RVVCALL(OPIVV2_RM,vasub_vv_b,OP_SSS_B,H1,H1,H1,asub32)2459 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2460 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2461 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2462 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2463 GEN_VEXT_VV_RM(vasub_vv_b, 1)
2464 GEN_VEXT_VV_RM(vasub_vv_h, 2)
2465 GEN_VEXT_VV_RM(vasub_vv_w, 4)
2466 GEN_VEXT_VV_RM(vasub_vv_d, 8)
2467
2468 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2469 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2470 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2471 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2472 GEN_VEXT_VX_RM(vasub_vx_b, 1)
2473 GEN_VEXT_VX_RM(vasub_vx_h, 2)
2474 GEN_VEXT_VX_RM(vasub_vx_w, 4)
2475 GEN_VEXT_VX_RM(vasub_vx_d, 8)
2476
2477 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2478 uint32_t a, uint32_t b)
2479 {
2480 int64_t res = (int64_t)a - b;
2481 uint8_t round = get_round(vxrm, res, 1);
2482
2483 return (res >> 1) + round;
2484 }
2485
asubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2486 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2487 uint64_t a, uint64_t b)
2488 {
2489 uint64_t res = (uint64_t)a - b;
2490 uint8_t round = get_round(vxrm, res, 1);
2491 uint64_t over = (uint64_t)(res > a) << 63;
2492
2493 return ((res >> 1) | over) + round;
2494 }
2495
RVVCALL(OPIVV2_RM,vasubu_vv_b,OP_UUU_B,H1,H1,H1,asubu32)2496 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2497 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2498 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2499 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2500 GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2501 GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2502 GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2503 GEN_VEXT_VV_RM(vasubu_vv_d, 8)
2504
2505 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2506 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2507 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2508 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2509 GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2510 GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2511 GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2512 GEN_VEXT_VX_RM(vasubu_vx_d, 8)
2513
2514 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2515 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2516 {
2517 uint8_t round;
2518 int16_t res;
2519
2520 res = (int16_t)a * (int16_t)b;
2521 round = get_round(vxrm, res, 7);
2522 res = (res >> 7) + round;
2523
2524 if (res > INT8_MAX) {
2525 env->vxsat = 0x1;
2526 return INT8_MAX;
2527 } else if (res < INT8_MIN) {
2528 env->vxsat = 0x1;
2529 return INT8_MIN;
2530 } else {
2531 return res;
2532 }
2533 }
2534
vsmul16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2535 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2536 {
2537 uint8_t round;
2538 int32_t res;
2539
2540 res = (int32_t)a * (int32_t)b;
2541 round = get_round(vxrm, res, 15);
2542 res = (res >> 15) + round;
2543
2544 if (res > INT16_MAX) {
2545 env->vxsat = 0x1;
2546 return INT16_MAX;
2547 } else if (res < INT16_MIN) {
2548 env->vxsat = 0x1;
2549 return INT16_MIN;
2550 } else {
2551 return res;
2552 }
2553 }
2554
vsmul32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2555 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2556 {
2557 uint8_t round;
2558 int64_t res;
2559
2560 res = (int64_t)a * (int64_t)b;
2561 round = get_round(vxrm, res, 31);
2562 res = (res >> 31) + round;
2563
2564 if (res > INT32_MAX) {
2565 env->vxsat = 0x1;
2566 return INT32_MAX;
2567 } else if (res < INT32_MIN) {
2568 env->vxsat = 0x1;
2569 return INT32_MIN;
2570 } else {
2571 return res;
2572 }
2573 }
2574
vsmul64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2575 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2576 {
2577 uint8_t round;
2578 uint64_t hi_64, lo_64;
2579 int64_t res;
2580
2581 if (a == INT64_MIN && b == INT64_MIN) {
2582 env->vxsat = 1;
2583 return INT64_MAX;
2584 }
2585
2586 muls64(&lo_64, &hi_64, a, b);
2587 round = get_round(vxrm, lo_64, 63);
2588 /*
2589 * Cannot overflow, as there are always
2590 * 2 sign bits after multiply.
2591 */
2592 res = (hi_64 << 1) | (lo_64 >> 63);
2593 if (round) {
2594 if (res == INT64_MAX) {
2595 env->vxsat = 1;
2596 } else {
2597 res += 1;
2598 }
2599 }
2600 return res;
2601 }
2602
RVVCALL(OPIVV2_RM,vsmul_vv_b,OP_SSS_B,H1,H1,H1,vsmul8)2603 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2604 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2605 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2606 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2607 GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2608 GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2609 GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2610 GEN_VEXT_VV_RM(vsmul_vv_d, 8)
2611
2612 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2613 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2614 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2615 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2616 GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2617 GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2618 GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2619 GEN_VEXT_VX_RM(vsmul_vx_d, 8)
2620
2621 /* Vector Single-Width Scaling Shift Instructions */
2622 static inline uint8_t
2623 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2624 {
2625 uint8_t round, shift = b & 0x7;
2626 uint8_t res;
2627
2628 round = get_round(vxrm, a, shift);
2629 res = (a >> shift) + round;
2630 return res;
2631 }
2632 static inline uint16_t
vssrl16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2633 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2634 {
2635 uint8_t round, shift = b & 0xf;
2636
2637 round = get_round(vxrm, a, shift);
2638 return (a >> shift) + round;
2639 }
2640 static inline uint32_t
vssrl32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2641 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2642 {
2643 uint8_t round, shift = b & 0x1f;
2644
2645 round = get_round(vxrm, a, shift);
2646 return (a >> shift) + round;
2647 }
2648 static inline uint64_t
vssrl64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2649 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2650 {
2651 uint8_t round, shift = b & 0x3f;
2652
2653 round = get_round(vxrm, a, shift);
2654 return (a >> shift) + round;
2655 }
RVVCALL(OPIVV2_RM,vssrl_vv_b,OP_UUU_B,H1,H1,H1,vssrl8)2656 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2657 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2658 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2659 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2660 GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2661 GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2662 GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2663 GEN_VEXT_VV_RM(vssrl_vv_d, 8)
2664
2665 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2666 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2667 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2668 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2669 GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2670 GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2671 GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2672 GEN_VEXT_VX_RM(vssrl_vx_d, 8)
2673
2674 static inline int8_t
2675 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2676 {
2677 uint8_t round, shift = b & 0x7;
2678
2679 round = get_round(vxrm, a, shift);
2680 return (a >> shift) + round;
2681 }
2682 static inline int16_t
vssra16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2683 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2684 {
2685 uint8_t round, shift = b & 0xf;
2686
2687 round = get_round(vxrm, a, shift);
2688 return (a >> shift) + round;
2689 }
2690 static inline int32_t
vssra32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2691 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2692 {
2693 uint8_t round, shift = b & 0x1f;
2694
2695 round = get_round(vxrm, a, shift);
2696 return (a >> shift) + round;
2697 }
2698 static inline int64_t
vssra64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2699 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2700 {
2701 uint8_t round, shift = b & 0x3f;
2702
2703 round = get_round(vxrm, a, shift);
2704 return (a >> shift) + round;
2705 }
2706
RVVCALL(OPIVV2_RM,vssra_vv_b,OP_SSS_B,H1,H1,H1,vssra8)2707 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2708 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2709 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2710 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2711 GEN_VEXT_VV_RM(vssra_vv_b, 1)
2712 GEN_VEXT_VV_RM(vssra_vv_h, 2)
2713 GEN_VEXT_VV_RM(vssra_vv_w, 4)
2714 GEN_VEXT_VV_RM(vssra_vv_d, 8)
2715
2716 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2717 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2718 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2719 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2720 GEN_VEXT_VX_RM(vssra_vx_b, 1)
2721 GEN_VEXT_VX_RM(vssra_vx_h, 2)
2722 GEN_VEXT_VX_RM(vssra_vx_w, 4)
2723 GEN_VEXT_VX_RM(vssra_vx_d, 8)
2724
2725 /* Vector Narrowing Fixed-Point Clip Instructions */
2726 static inline int8_t
2727 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2728 {
2729 uint8_t round, shift = b & 0xf;
2730 int16_t res;
2731
2732 round = get_round(vxrm, a, shift);
2733 res = (a >> shift) + round;
2734 if (res > INT8_MAX) {
2735 env->vxsat = 0x1;
2736 return INT8_MAX;
2737 } else if (res < INT8_MIN) {
2738 env->vxsat = 0x1;
2739 return INT8_MIN;
2740 } else {
2741 return res;
2742 }
2743 }
2744
2745 static inline int16_t
vnclip16(CPURISCVState * env,int vxrm,int32_t a,int16_t b)2746 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2747 {
2748 uint8_t round, shift = b & 0x1f;
2749 int32_t res;
2750
2751 round = get_round(vxrm, a, shift);
2752 res = (a >> shift) + round;
2753 if (res > INT16_MAX) {
2754 env->vxsat = 0x1;
2755 return INT16_MAX;
2756 } else if (res < INT16_MIN) {
2757 env->vxsat = 0x1;
2758 return INT16_MIN;
2759 } else {
2760 return res;
2761 }
2762 }
2763
2764 static inline int32_t
vnclip32(CPURISCVState * env,int vxrm,int64_t a,int32_t b)2765 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2766 {
2767 uint8_t round, shift = b & 0x3f;
2768 int64_t res;
2769
2770 round = get_round(vxrm, a, shift);
2771 res = (a >> shift) + round;
2772 if (res > INT32_MAX) {
2773 env->vxsat = 0x1;
2774 return INT32_MAX;
2775 } else if (res < INT32_MIN) {
2776 env->vxsat = 0x1;
2777 return INT32_MIN;
2778 } else {
2779 return res;
2780 }
2781 }
2782
RVVCALL(OPIVV2_RM,vnclip_wv_b,NOP_SSS_B,H1,H2,H1,vnclip8)2783 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2784 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2785 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2786 GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2787 GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2788 GEN_VEXT_VV_RM(vnclip_wv_w, 4)
2789
2790 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2791 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2792 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2793 GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2794 GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2795 GEN_VEXT_VX_RM(vnclip_wx_w, 4)
2796
2797 static inline uint8_t
2798 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2799 {
2800 uint8_t round, shift = b & 0xf;
2801 uint16_t res;
2802
2803 round = get_round(vxrm, a, shift);
2804 res = (a >> shift) + round;
2805 if (res > UINT8_MAX) {
2806 env->vxsat = 0x1;
2807 return UINT8_MAX;
2808 } else {
2809 return res;
2810 }
2811 }
2812
2813 static inline uint16_t
vnclipu16(CPURISCVState * env,int vxrm,uint32_t a,uint16_t b)2814 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2815 {
2816 uint8_t round, shift = b & 0x1f;
2817 uint32_t res;
2818
2819 round = get_round(vxrm, a, shift);
2820 res = (a >> shift) + round;
2821 if (res > UINT16_MAX) {
2822 env->vxsat = 0x1;
2823 return UINT16_MAX;
2824 } else {
2825 return res;
2826 }
2827 }
2828
2829 static inline uint32_t
vnclipu32(CPURISCVState * env,int vxrm,uint64_t a,uint32_t b)2830 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2831 {
2832 uint8_t round, shift = b & 0x3f;
2833 uint64_t res;
2834
2835 round = get_round(vxrm, a, shift);
2836 res = (a >> shift) + round;
2837 if (res > UINT32_MAX) {
2838 env->vxsat = 0x1;
2839 return UINT32_MAX;
2840 } else {
2841 return res;
2842 }
2843 }
2844
RVVCALL(OPIVV2_RM,vnclipu_wv_b,NOP_UUU_B,H1,H2,H1,vnclipu8)2845 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2846 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2847 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2848 GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
2849 GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
2850 GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
2851
2852 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2853 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2854 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2855 GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
2856 GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
2857 GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
2858
2859 /*
2860 * Vector Float Point Arithmetic Instructions
2861 */
2862 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2863 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2864 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2865 CPURISCVState *env) \
2866 { \
2867 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2868 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2869 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2870 }
2871
2872 #define GEN_VEXT_VV_ENV(NAME, ESZ) \
2873 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2874 void *vs2, CPURISCVState *env, \
2875 uint32_t desc) \
2876 { \
2877 uint32_t vm = vext_vm(desc); \
2878 uint32_t vl = env->vl; \
2879 uint32_t total_elems = \
2880 vext_get_total_elems(env, desc, ESZ); \
2881 uint32_t vta = vext_vta(desc); \
2882 uint32_t vma = vext_vma(desc); \
2883 uint32_t i; \
2884 \
2885 VSTART_CHECK_EARLY_EXIT(env); \
2886 \
2887 for (i = env->vstart; i < vl; i++) { \
2888 if (!vm && !vext_elem_mask(v0, i)) { \
2889 /* set masked-off elements to 1s */ \
2890 vext_set_elems_1s(vd, vma, i * ESZ, \
2891 (i + 1) * ESZ); \
2892 continue; \
2893 } \
2894 do_##NAME(vd, vs1, vs2, i, env); \
2895 } \
2896 env->vstart = 0; \
2897 /* set tail elements to 1s */ \
2898 vext_set_elems_1s(vd, vta, vl * ESZ, \
2899 total_elems * ESZ); \
2900 }
2901
2902 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2903 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2904 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2905 GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
2906 GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
2907 GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
2908
2909 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2910 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2911 CPURISCVState *env) \
2912 { \
2913 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2914 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2915 }
2916
2917 #define GEN_VEXT_VF(NAME, ESZ) \
2918 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2919 void *vs2, CPURISCVState *env, \
2920 uint32_t desc) \
2921 { \
2922 uint32_t vm = vext_vm(desc); \
2923 uint32_t vl = env->vl; \
2924 uint32_t total_elems = \
2925 vext_get_total_elems(env, desc, ESZ); \
2926 uint32_t vta = vext_vta(desc); \
2927 uint32_t vma = vext_vma(desc); \
2928 uint32_t i; \
2929 \
2930 VSTART_CHECK_EARLY_EXIT(env); \
2931 \
2932 for (i = env->vstart; i < vl; i++) { \
2933 if (!vm && !vext_elem_mask(v0, i)) { \
2934 /* set masked-off elements to 1s */ \
2935 vext_set_elems_1s(vd, vma, i * ESZ, \
2936 (i + 1) * ESZ); \
2937 continue; \
2938 } \
2939 do_##NAME(vd, s1, vs2, i, env); \
2940 } \
2941 env->vstart = 0; \
2942 /* set tail elements to 1s */ \
2943 vext_set_elems_1s(vd, vta, vl * ESZ, \
2944 total_elems * ESZ); \
2945 }
2946
2947 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2948 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2949 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2950 GEN_VEXT_VF(vfadd_vf_h, 2)
2951 GEN_VEXT_VF(vfadd_vf_w, 4)
2952 GEN_VEXT_VF(vfadd_vf_d, 8)
2953
2954 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2955 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2956 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2957 GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
2958 GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
2959 GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
2960 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2961 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2962 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2963 GEN_VEXT_VF(vfsub_vf_h, 2)
2964 GEN_VEXT_VF(vfsub_vf_w, 4)
2965 GEN_VEXT_VF(vfsub_vf_d, 8)
2966
2967 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2968 {
2969 return float16_sub(b, a, s);
2970 }
2971
float32_rsub(uint32_t a,uint32_t b,float_status * s)2972 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2973 {
2974 return float32_sub(b, a, s);
2975 }
2976
float64_rsub(uint64_t a,uint64_t b,float_status * s)2977 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2978 {
2979 return float64_sub(b, a, s);
2980 }
2981
RVVCALL(OPFVF2,vfrsub_vf_h,OP_UUU_H,H2,H2,float16_rsub)2982 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2983 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2984 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2985 GEN_VEXT_VF(vfrsub_vf_h, 2)
2986 GEN_VEXT_VF(vfrsub_vf_w, 4)
2987 GEN_VEXT_VF(vfrsub_vf_d, 8)
2988
2989 /* Vector Widening Floating-Point Add/Subtract Instructions */
2990 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2991 {
2992 return float32_add(float16_to_float32(a, true, s),
2993 float16_to_float32(b, true, s), s);
2994 }
2995
vfwadd32(uint32_t a,uint32_t b,float_status * s)2996 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2997 {
2998 return float64_add(float32_to_float64(a, s),
2999 float32_to_float64(b, s), s);
3000
3001 }
3002
RVVCALL(OPFVV2,vfwadd_vv_h,WOP_UUU_H,H4,H2,H2,vfwadd16)3003 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3004 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3005 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3006 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
3007 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3008 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3009 GEN_VEXT_VF(vfwadd_vf_h, 4)
3010 GEN_VEXT_VF(vfwadd_vf_w, 8)
3011
3012 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3013 {
3014 return float32_sub(float16_to_float32(a, true, s),
3015 float16_to_float32(b, true, s), s);
3016 }
3017
vfwsub32(uint32_t a,uint32_t b,float_status * s)3018 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3019 {
3020 return float64_sub(float32_to_float64(a, s),
3021 float32_to_float64(b, s), s);
3022
3023 }
3024
RVVCALL(OPFVV2,vfwsub_vv_h,WOP_UUU_H,H4,H2,H2,vfwsub16)3025 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3026 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3027 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3028 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
3029 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3030 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3031 GEN_VEXT_VF(vfwsub_vf_h, 4)
3032 GEN_VEXT_VF(vfwsub_vf_w, 8)
3033
3034 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3035 {
3036 return float32_add(a, float16_to_float32(b, true, s), s);
3037 }
3038
vfwaddw32(uint64_t a,uint32_t b,float_status * s)3039 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3040 {
3041 return float64_add(a, float32_to_float64(b, s), s);
3042 }
3043
RVVCALL(OPFVV2,vfwadd_wv_h,WOP_WUUU_H,H4,H2,H2,vfwaddw16)3044 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3045 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3046 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3047 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
3048 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3049 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3050 GEN_VEXT_VF(vfwadd_wf_h, 4)
3051 GEN_VEXT_VF(vfwadd_wf_w, 8)
3052
3053 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3054 {
3055 return float32_sub(a, float16_to_float32(b, true, s), s);
3056 }
3057
vfwsubw32(uint64_t a,uint32_t b,float_status * s)3058 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3059 {
3060 return float64_sub(a, float32_to_float64(b, s), s);
3061 }
3062
RVVCALL(OPFVV2,vfwsub_wv_h,WOP_WUUU_H,H4,H2,H2,vfwsubw16)3063 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3064 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3065 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3066 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
3067 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3068 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3069 GEN_VEXT_VF(vfwsub_wf_h, 4)
3070 GEN_VEXT_VF(vfwsub_wf_w, 8)
3071
3072 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3073 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3074 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3075 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3076 GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3077 GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3078 GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
3079 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3080 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3081 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3082 GEN_VEXT_VF(vfmul_vf_h, 2)
3083 GEN_VEXT_VF(vfmul_vf_w, 4)
3084 GEN_VEXT_VF(vfmul_vf_d, 8)
3085
3086 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3087 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3088 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3089 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3090 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3091 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
3092 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3093 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3094 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3095 GEN_VEXT_VF(vfdiv_vf_h, 2)
3096 GEN_VEXT_VF(vfdiv_vf_w, 4)
3097 GEN_VEXT_VF(vfdiv_vf_d, 8)
3098
3099 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3100 {
3101 return float16_div(b, a, s);
3102 }
3103
float32_rdiv(uint32_t a,uint32_t b,float_status * s)3104 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3105 {
3106 return float32_div(b, a, s);
3107 }
3108
float64_rdiv(uint64_t a,uint64_t b,float_status * s)3109 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3110 {
3111 return float64_div(b, a, s);
3112 }
3113
RVVCALL(OPFVF2,vfrdiv_vf_h,OP_UUU_H,H2,H2,float16_rdiv)3114 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3115 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3116 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3117 GEN_VEXT_VF(vfrdiv_vf_h, 2)
3118 GEN_VEXT_VF(vfrdiv_vf_w, 4)
3119 GEN_VEXT_VF(vfrdiv_vf_d, 8)
3120
3121 /* Vector Widening Floating-Point Multiply */
3122 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3123 {
3124 return float32_mul(float16_to_float32(a, true, s),
3125 float16_to_float32(b, true, s), s);
3126 }
3127
vfwmul32(uint32_t a,uint32_t b,float_status * s)3128 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3129 {
3130 return float64_mul(float32_to_float64(a, s),
3131 float32_to_float64(b, s), s);
3132
3133 }
RVVCALL(OPFVV2,vfwmul_vv_h,WOP_UUU_H,H4,H2,H2,vfwmul16)3134 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3135 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3136 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3137 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
3138 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3139 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3140 GEN_VEXT_VF(vfwmul_vf_h, 4)
3141 GEN_VEXT_VF(vfwmul_vf_w, 8)
3142
3143 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3144 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3145 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3146 CPURISCVState *env) \
3147 { \
3148 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3149 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3150 TD d = *((TD *)vd + HD(i)); \
3151 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3152 }
3153
3154 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3155 {
3156 return float16_muladd(a, b, d, 0, s);
3157 }
3158
fmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3159 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3160 {
3161 return float32_muladd(a, b, d, 0, s);
3162 }
3163
fmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3164 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3165 {
3166 return float64_muladd(a, b, d, 0, s);
3167 }
3168
RVVCALL(OPFVV3,vfmacc_vv_h,OP_UUU_H,H2,H2,H2,fmacc16)3169 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3170 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3171 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3172 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3173 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3174 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
3175
3176 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3177 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3178 CPURISCVState *env) \
3179 { \
3180 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3181 TD d = *((TD *)vd + HD(i)); \
3182 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3183 }
3184
3185 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3186 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3187 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3188 GEN_VEXT_VF(vfmacc_vf_h, 2)
3189 GEN_VEXT_VF(vfmacc_vf_w, 4)
3190 GEN_VEXT_VF(vfmacc_vf_d, 8)
3191
3192 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3193 {
3194 return float16_muladd(a, b, d, float_muladd_negate_c |
3195 float_muladd_negate_product, s);
3196 }
3197
fnmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3198 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3199 {
3200 return float32_muladd(a, b, d, float_muladd_negate_c |
3201 float_muladd_negate_product, s);
3202 }
3203
fnmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3204 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3205 {
3206 return float64_muladd(a, b, d, float_muladd_negate_c |
3207 float_muladd_negate_product, s);
3208 }
3209
RVVCALL(OPFVV3,vfnmacc_vv_h,OP_UUU_H,H2,H2,H2,fnmacc16)3210 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3211 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3212 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3213 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3214 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3215 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
3216 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3217 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3218 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3219 GEN_VEXT_VF(vfnmacc_vf_h, 2)
3220 GEN_VEXT_VF(vfnmacc_vf_w, 4)
3221 GEN_VEXT_VF(vfnmacc_vf_d, 8)
3222
3223 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3224 {
3225 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3226 }
3227
fmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3228 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3229 {
3230 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3231 }
3232
fmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3233 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3234 {
3235 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3236 }
3237
RVVCALL(OPFVV3,vfmsac_vv_h,OP_UUU_H,H2,H2,H2,fmsac16)3238 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3239 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3240 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3241 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3242 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3243 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
3244 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3245 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3246 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3247 GEN_VEXT_VF(vfmsac_vf_h, 2)
3248 GEN_VEXT_VF(vfmsac_vf_w, 4)
3249 GEN_VEXT_VF(vfmsac_vf_d, 8)
3250
3251 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3252 {
3253 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3254 }
3255
fnmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3256 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3257 {
3258 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3259 }
3260
fnmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3261 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3262 {
3263 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3264 }
3265
RVVCALL(OPFVV3,vfnmsac_vv_h,OP_UUU_H,H2,H2,H2,fnmsac16)3266 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3267 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3268 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3269 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3270 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3271 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
3272 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3273 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3274 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3275 GEN_VEXT_VF(vfnmsac_vf_h, 2)
3276 GEN_VEXT_VF(vfnmsac_vf_w, 4)
3277 GEN_VEXT_VF(vfnmsac_vf_d, 8)
3278
3279 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3280 {
3281 return float16_muladd(d, b, a, 0, s);
3282 }
3283
fmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3284 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3285 {
3286 return float32_muladd(d, b, a, 0, s);
3287 }
3288
fmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3289 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3290 {
3291 return float64_muladd(d, b, a, 0, s);
3292 }
3293
RVVCALL(OPFVV3,vfmadd_vv_h,OP_UUU_H,H2,H2,H2,fmadd16)3294 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3295 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3296 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3297 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3298 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3299 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
3300 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3301 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3302 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3303 GEN_VEXT_VF(vfmadd_vf_h, 2)
3304 GEN_VEXT_VF(vfmadd_vf_w, 4)
3305 GEN_VEXT_VF(vfmadd_vf_d, 8)
3306
3307 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3308 {
3309 return float16_muladd(d, b, a, float_muladd_negate_c |
3310 float_muladd_negate_product, s);
3311 }
3312
fnmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3313 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3314 {
3315 return float32_muladd(d, b, a, float_muladd_negate_c |
3316 float_muladd_negate_product, s);
3317 }
3318
fnmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3319 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3320 {
3321 return float64_muladd(d, b, a, float_muladd_negate_c |
3322 float_muladd_negate_product, s);
3323 }
3324
RVVCALL(OPFVV3,vfnmadd_vv_h,OP_UUU_H,H2,H2,H2,fnmadd16)3325 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3326 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3327 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3328 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3329 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3330 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
3331 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3332 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3333 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3334 GEN_VEXT_VF(vfnmadd_vf_h, 2)
3335 GEN_VEXT_VF(vfnmadd_vf_w, 4)
3336 GEN_VEXT_VF(vfnmadd_vf_d, 8)
3337
3338 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3339 {
3340 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3341 }
3342
fmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3343 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3344 {
3345 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3346 }
3347
fmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3348 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3349 {
3350 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3351 }
3352
RVVCALL(OPFVV3,vfmsub_vv_h,OP_UUU_H,H2,H2,H2,fmsub16)3353 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3354 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3355 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3356 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3357 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3358 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
3359 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3360 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3361 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3362 GEN_VEXT_VF(vfmsub_vf_h, 2)
3363 GEN_VEXT_VF(vfmsub_vf_w, 4)
3364 GEN_VEXT_VF(vfmsub_vf_d, 8)
3365
3366 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3367 {
3368 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3369 }
3370
fnmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3371 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3372 {
3373 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3374 }
3375
fnmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3376 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3377 {
3378 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3379 }
3380
RVVCALL(OPFVV3,vfnmsub_vv_h,OP_UUU_H,H2,H2,H2,fnmsub16)3381 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3382 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3383 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3384 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3385 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3386 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
3387 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3388 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3389 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3390 GEN_VEXT_VF(vfnmsub_vf_h, 2)
3391 GEN_VEXT_VF(vfnmsub_vf_w, 4)
3392 GEN_VEXT_VF(vfnmsub_vf_d, 8)
3393
3394 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3395 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3396 {
3397 return float32_muladd(float16_to_float32(a, true, s),
3398 float16_to_float32(b, true, s), d, 0, s);
3399 }
3400
fwmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3401 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3402 {
3403 return float64_muladd(float32_to_float64(a, s),
3404 float32_to_float64(b, s), d, 0, s);
3405 }
3406
RVVCALL(OPFVV3,vfwmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwmacc16)3407 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3408 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3409 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3410 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
3411 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3412 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3413 GEN_VEXT_VF(vfwmacc_vf_h, 4)
3414 GEN_VEXT_VF(vfwmacc_vf_w, 8)
3415
3416 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3417 {
3418 return float32_muladd(bfloat16_to_float32(a, s),
3419 bfloat16_to_float32(b, s), d, 0, s);
3420 }
3421
RVVCALL(OPFVV3,vfwmaccbf16_vv,WOP_UUU_H,H4,H2,H2,fwmaccbf16)3422 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3423 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3424 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
3425 GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3426
3427 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3428 {
3429 return float32_muladd(float16_to_float32(a, true, s),
3430 float16_to_float32(b, true, s), d,
3431 float_muladd_negate_c | float_muladd_negate_product,
3432 s);
3433 }
3434
fwnmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3435 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3436 {
3437 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3438 d, float_muladd_negate_c |
3439 float_muladd_negate_product, s);
3440 }
3441
RVVCALL(OPFVV3,vfwnmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwnmacc16)3442 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3443 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3444 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3445 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
3446 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3447 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3448 GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3449 GEN_VEXT_VF(vfwnmacc_vf_w, 8)
3450
3451 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3452 {
3453 return float32_muladd(float16_to_float32(a, true, s),
3454 float16_to_float32(b, true, s), d,
3455 float_muladd_negate_c, s);
3456 }
3457
fwmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3458 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3459 {
3460 return float64_muladd(float32_to_float64(a, s),
3461 float32_to_float64(b, s), d,
3462 float_muladd_negate_c, s);
3463 }
3464
RVVCALL(OPFVV3,vfwmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwmsac16)3465 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3466 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3467 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3468 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
3469 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3470 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3471 GEN_VEXT_VF(vfwmsac_vf_h, 4)
3472 GEN_VEXT_VF(vfwmsac_vf_w, 8)
3473
3474 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3475 {
3476 return float32_muladd(float16_to_float32(a, true, s),
3477 float16_to_float32(b, true, s), d,
3478 float_muladd_negate_product, s);
3479 }
3480
fwnmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3481 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3482 {
3483 return float64_muladd(float32_to_float64(a, s),
3484 float32_to_float64(b, s), d,
3485 float_muladd_negate_product, s);
3486 }
3487
RVVCALL(OPFVV3,vfwnmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwnmsac16)3488 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3489 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3490 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3491 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
3492 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3493 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3494 GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3495 GEN_VEXT_VF(vfwnmsac_vf_w, 8)
3496
3497 /* Vector Floating-Point Square-Root Instruction */
3498 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3499 static void do_##NAME(void *vd, void *vs2, int i, \
3500 CPURISCVState *env) \
3501 { \
3502 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3503 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3504 }
3505
3506 #define GEN_VEXT_V_ENV(NAME, ESZ) \
3507 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3508 CPURISCVState *env, uint32_t desc) \
3509 { \
3510 uint32_t vm = vext_vm(desc); \
3511 uint32_t vl = env->vl; \
3512 uint32_t total_elems = \
3513 vext_get_total_elems(env, desc, ESZ); \
3514 uint32_t vta = vext_vta(desc); \
3515 uint32_t vma = vext_vma(desc); \
3516 uint32_t i; \
3517 \
3518 VSTART_CHECK_EARLY_EXIT(env); \
3519 \
3520 if (vl == 0) { \
3521 return; \
3522 } \
3523 for (i = env->vstart; i < vl; i++) { \
3524 if (!vm && !vext_elem_mask(v0, i)) { \
3525 /* set masked-off elements to 1s */ \
3526 vext_set_elems_1s(vd, vma, i * ESZ, \
3527 (i + 1) * ESZ); \
3528 continue; \
3529 } \
3530 do_##NAME(vd, vs2, i, env); \
3531 } \
3532 env->vstart = 0; \
3533 vext_set_elems_1s(vd, vta, vl * ESZ, \
3534 total_elems * ESZ); \
3535 }
3536
3537 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3538 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3539 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3540 GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3541 GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3542 GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
3543
3544 /*
3545 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3546 *
3547 * Adapted from riscv-v-spec recip.c:
3548 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3549 */
3550 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3551 {
3552 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3553 uint64_t exp = extract64(f, frac_size, exp_size);
3554 uint64_t frac = extract64(f, 0, frac_size);
3555
3556 const uint8_t lookup_table[] = {
3557 52, 51, 50, 48, 47, 46, 44, 43,
3558 42, 41, 40, 39, 38, 36, 35, 34,
3559 33, 32, 31, 30, 30, 29, 28, 27,
3560 26, 25, 24, 23, 23, 22, 21, 20,
3561 19, 19, 18, 17, 16, 16, 15, 14,
3562 14, 13, 12, 12, 11, 10, 10, 9,
3563 9, 8, 7, 7, 6, 6, 5, 4,
3564 4, 3, 3, 2, 2, 1, 1, 0,
3565 127, 125, 123, 121, 119, 118, 116, 114,
3566 113, 111, 109, 108, 106, 105, 103, 102,
3567 100, 99, 97, 96, 95, 93, 92, 91,
3568 90, 88, 87, 86, 85, 84, 83, 82,
3569 80, 79, 78, 77, 76, 75, 74, 73,
3570 72, 71, 70, 70, 69, 68, 67, 66,
3571 65, 64, 63, 63, 62, 61, 60, 59,
3572 59, 58, 57, 56, 56, 55, 54, 53
3573 };
3574 const int precision = 7;
3575
3576 if (exp == 0 && frac != 0) { /* subnormal */
3577 /* Normalize the subnormal. */
3578 while (extract64(frac, frac_size - 1, 1) == 0) {
3579 exp--;
3580 frac <<= 1;
3581 }
3582
3583 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3584 }
3585
3586 int idx = ((exp & 1) << (precision - 1)) |
3587 (frac >> (frac_size - precision + 1));
3588 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3589 (frac_size - precision);
3590 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3591
3592 uint64_t val = 0;
3593 val = deposit64(val, 0, frac_size, out_frac);
3594 val = deposit64(val, frac_size, exp_size, out_exp);
3595 val = deposit64(val, frac_size + exp_size, 1, sign);
3596 return val;
3597 }
3598
frsqrt7_h(float16 f,float_status * s)3599 static float16 frsqrt7_h(float16 f, float_status *s)
3600 {
3601 int exp_size = 5, frac_size = 10;
3602 bool sign = float16_is_neg(f);
3603
3604 /*
3605 * frsqrt7(sNaN) = canonical NaN
3606 * frsqrt7(-inf) = canonical NaN
3607 * frsqrt7(-normal) = canonical NaN
3608 * frsqrt7(-subnormal) = canonical NaN
3609 */
3610 if (float16_is_signaling_nan(f, s) ||
3611 (float16_is_infinity(f) && sign) ||
3612 (float16_is_normal(f) && sign) ||
3613 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3614 s->float_exception_flags |= float_flag_invalid;
3615 return float16_default_nan(s);
3616 }
3617
3618 /* frsqrt7(qNaN) = canonical NaN */
3619 if (float16_is_quiet_nan(f, s)) {
3620 return float16_default_nan(s);
3621 }
3622
3623 /* frsqrt7(+-0) = +-inf */
3624 if (float16_is_zero(f)) {
3625 s->float_exception_flags |= float_flag_divbyzero;
3626 return float16_set_sign(float16_infinity, sign);
3627 }
3628
3629 /* frsqrt7(+inf) = +0 */
3630 if (float16_is_infinity(f) && !sign) {
3631 return float16_set_sign(float16_zero, sign);
3632 }
3633
3634 /* +normal, +subnormal */
3635 uint64_t val = frsqrt7(f, exp_size, frac_size);
3636 return make_float16(val);
3637 }
3638
frsqrt7_s(float32 f,float_status * s)3639 static float32 frsqrt7_s(float32 f, float_status *s)
3640 {
3641 int exp_size = 8, frac_size = 23;
3642 bool sign = float32_is_neg(f);
3643
3644 /*
3645 * frsqrt7(sNaN) = canonical NaN
3646 * frsqrt7(-inf) = canonical NaN
3647 * frsqrt7(-normal) = canonical NaN
3648 * frsqrt7(-subnormal) = canonical NaN
3649 */
3650 if (float32_is_signaling_nan(f, s) ||
3651 (float32_is_infinity(f) && sign) ||
3652 (float32_is_normal(f) && sign) ||
3653 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3654 s->float_exception_flags |= float_flag_invalid;
3655 return float32_default_nan(s);
3656 }
3657
3658 /* frsqrt7(qNaN) = canonical NaN */
3659 if (float32_is_quiet_nan(f, s)) {
3660 return float32_default_nan(s);
3661 }
3662
3663 /* frsqrt7(+-0) = +-inf */
3664 if (float32_is_zero(f)) {
3665 s->float_exception_flags |= float_flag_divbyzero;
3666 return float32_set_sign(float32_infinity, sign);
3667 }
3668
3669 /* frsqrt7(+inf) = +0 */
3670 if (float32_is_infinity(f) && !sign) {
3671 return float32_set_sign(float32_zero, sign);
3672 }
3673
3674 /* +normal, +subnormal */
3675 uint64_t val = frsqrt7(f, exp_size, frac_size);
3676 return make_float32(val);
3677 }
3678
frsqrt7_d(float64 f,float_status * s)3679 static float64 frsqrt7_d(float64 f, float_status *s)
3680 {
3681 int exp_size = 11, frac_size = 52;
3682 bool sign = float64_is_neg(f);
3683
3684 /*
3685 * frsqrt7(sNaN) = canonical NaN
3686 * frsqrt7(-inf) = canonical NaN
3687 * frsqrt7(-normal) = canonical NaN
3688 * frsqrt7(-subnormal) = canonical NaN
3689 */
3690 if (float64_is_signaling_nan(f, s) ||
3691 (float64_is_infinity(f) && sign) ||
3692 (float64_is_normal(f) && sign) ||
3693 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3694 s->float_exception_flags |= float_flag_invalid;
3695 return float64_default_nan(s);
3696 }
3697
3698 /* frsqrt7(qNaN) = canonical NaN */
3699 if (float64_is_quiet_nan(f, s)) {
3700 return float64_default_nan(s);
3701 }
3702
3703 /* frsqrt7(+-0) = +-inf */
3704 if (float64_is_zero(f)) {
3705 s->float_exception_flags |= float_flag_divbyzero;
3706 return float64_set_sign(float64_infinity, sign);
3707 }
3708
3709 /* frsqrt7(+inf) = +0 */
3710 if (float64_is_infinity(f) && !sign) {
3711 return float64_set_sign(float64_zero, sign);
3712 }
3713
3714 /* +normal, +subnormal */
3715 uint64_t val = frsqrt7(f, exp_size, frac_size);
3716 return make_float64(val);
3717 }
3718
RVVCALL(OPFVV1,vfrsqrt7_v_h,OP_UU_H,H2,H2,frsqrt7_h)3719 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3720 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3721 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3722 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3723 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3724 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
3725
3726 /*
3727 * Vector Floating-Point Reciprocal Estimate Instruction
3728 *
3729 * Adapted from riscv-v-spec recip.c:
3730 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3731 */
3732 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3733 float_status *s)
3734 {
3735 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3736 uint64_t exp = extract64(f, frac_size, exp_size);
3737 uint64_t frac = extract64(f, 0, frac_size);
3738
3739 const uint8_t lookup_table[] = {
3740 127, 125, 123, 121, 119, 117, 116, 114,
3741 112, 110, 109, 107, 105, 104, 102, 100,
3742 99, 97, 96, 94, 93, 91, 90, 88,
3743 87, 85, 84, 83, 81, 80, 79, 77,
3744 76, 75, 74, 72, 71, 70, 69, 68,
3745 66, 65, 64, 63, 62, 61, 60, 59,
3746 58, 57, 56, 55, 54, 53, 52, 51,
3747 50, 49, 48, 47, 46, 45, 44, 43,
3748 42, 41, 40, 40, 39, 38, 37, 36,
3749 35, 35, 34, 33, 32, 31, 31, 30,
3750 29, 28, 28, 27, 26, 25, 25, 24,
3751 23, 23, 22, 21, 21, 20, 19, 19,
3752 18, 17, 17, 16, 15, 15, 14, 14,
3753 13, 12, 12, 11, 11, 10, 9, 9,
3754 8, 8, 7, 7, 6, 5, 5, 4,
3755 4, 3, 3, 2, 2, 1, 1, 0
3756 };
3757 const int precision = 7;
3758
3759 if (exp == 0 && frac != 0) { /* subnormal */
3760 /* Normalize the subnormal. */
3761 while (extract64(frac, frac_size - 1, 1) == 0) {
3762 exp--;
3763 frac <<= 1;
3764 }
3765
3766 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3767
3768 if (exp != 0 && exp != UINT64_MAX) {
3769 /*
3770 * Overflow to inf or max value of same sign,
3771 * depending on sign and rounding mode.
3772 */
3773 s->float_exception_flags |= (float_flag_inexact |
3774 float_flag_overflow);
3775
3776 if ((s->float_rounding_mode == float_round_to_zero) ||
3777 ((s->float_rounding_mode == float_round_down) && !sign) ||
3778 ((s->float_rounding_mode == float_round_up) && sign)) {
3779 /* Return greatest/negative finite value. */
3780 return (sign << (exp_size + frac_size)) |
3781 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3782 } else {
3783 /* Return +-inf. */
3784 return (sign << (exp_size + frac_size)) |
3785 MAKE_64BIT_MASK(frac_size, exp_size);
3786 }
3787 }
3788 }
3789
3790 int idx = frac >> (frac_size - precision);
3791 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3792 (frac_size - precision);
3793 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3794
3795 if (out_exp == 0 || out_exp == UINT64_MAX) {
3796 /*
3797 * The result is subnormal, but don't raise the underflow exception,
3798 * because there's no additional loss of precision.
3799 */
3800 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3801 if (out_exp == UINT64_MAX) {
3802 out_frac >>= 1;
3803 out_exp = 0;
3804 }
3805 }
3806
3807 uint64_t val = 0;
3808 val = deposit64(val, 0, frac_size, out_frac);
3809 val = deposit64(val, frac_size, exp_size, out_exp);
3810 val = deposit64(val, frac_size + exp_size, 1, sign);
3811 return val;
3812 }
3813
frec7_h(float16 f,float_status * s)3814 static float16 frec7_h(float16 f, float_status *s)
3815 {
3816 int exp_size = 5, frac_size = 10;
3817 bool sign = float16_is_neg(f);
3818
3819 /* frec7(+-inf) = +-0 */
3820 if (float16_is_infinity(f)) {
3821 return float16_set_sign(float16_zero, sign);
3822 }
3823
3824 /* frec7(+-0) = +-inf */
3825 if (float16_is_zero(f)) {
3826 s->float_exception_flags |= float_flag_divbyzero;
3827 return float16_set_sign(float16_infinity, sign);
3828 }
3829
3830 /* frec7(sNaN) = canonical NaN */
3831 if (float16_is_signaling_nan(f, s)) {
3832 s->float_exception_flags |= float_flag_invalid;
3833 return float16_default_nan(s);
3834 }
3835
3836 /* frec7(qNaN) = canonical NaN */
3837 if (float16_is_quiet_nan(f, s)) {
3838 return float16_default_nan(s);
3839 }
3840
3841 /* +-normal, +-subnormal */
3842 uint64_t val = frec7(f, exp_size, frac_size, s);
3843 return make_float16(val);
3844 }
3845
frec7_s(float32 f,float_status * s)3846 static float32 frec7_s(float32 f, float_status *s)
3847 {
3848 int exp_size = 8, frac_size = 23;
3849 bool sign = float32_is_neg(f);
3850
3851 /* frec7(+-inf) = +-0 */
3852 if (float32_is_infinity(f)) {
3853 return float32_set_sign(float32_zero, sign);
3854 }
3855
3856 /* frec7(+-0) = +-inf */
3857 if (float32_is_zero(f)) {
3858 s->float_exception_flags |= float_flag_divbyzero;
3859 return float32_set_sign(float32_infinity, sign);
3860 }
3861
3862 /* frec7(sNaN) = canonical NaN */
3863 if (float32_is_signaling_nan(f, s)) {
3864 s->float_exception_flags |= float_flag_invalid;
3865 return float32_default_nan(s);
3866 }
3867
3868 /* frec7(qNaN) = canonical NaN */
3869 if (float32_is_quiet_nan(f, s)) {
3870 return float32_default_nan(s);
3871 }
3872
3873 /* +-normal, +-subnormal */
3874 uint64_t val = frec7(f, exp_size, frac_size, s);
3875 return make_float32(val);
3876 }
3877
frec7_d(float64 f,float_status * s)3878 static float64 frec7_d(float64 f, float_status *s)
3879 {
3880 int exp_size = 11, frac_size = 52;
3881 bool sign = float64_is_neg(f);
3882
3883 /* frec7(+-inf) = +-0 */
3884 if (float64_is_infinity(f)) {
3885 return float64_set_sign(float64_zero, sign);
3886 }
3887
3888 /* frec7(+-0) = +-inf */
3889 if (float64_is_zero(f)) {
3890 s->float_exception_flags |= float_flag_divbyzero;
3891 return float64_set_sign(float64_infinity, sign);
3892 }
3893
3894 /* frec7(sNaN) = canonical NaN */
3895 if (float64_is_signaling_nan(f, s)) {
3896 s->float_exception_flags |= float_flag_invalid;
3897 return float64_default_nan(s);
3898 }
3899
3900 /* frec7(qNaN) = canonical NaN */
3901 if (float64_is_quiet_nan(f, s)) {
3902 return float64_default_nan(s);
3903 }
3904
3905 /* +-normal, +-subnormal */
3906 uint64_t val = frec7(f, exp_size, frac_size, s);
3907 return make_float64(val);
3908 }
3909
RVVCALL(OPFVV1,vfrec7_v_h,OP_UU_H,H2,H2,frec7_h)3910 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3911 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3912 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3913 GEN_VEXT_V_ENV(vfrec7_v_h, 2)
3914 GEN_VEXT_V_ENV(vfrec7_v_w, 4)
3915 GEN_VEXT_V_ENV(vfrec7_v_d, 8)
3916
3917 /* Vector Floating-Point MIN/MAX Instructions */
3918 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3919 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3920 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3921 GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
3922 GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
3923 GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
3924 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3925 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3926 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3927 GEN_VEXT_VF(vfmin_vf_h, 2)
3928 GEN_VEXT_VF(vfmin_vf_w, 4)
3929 GEN_VEXT_VF(vfmin_vf_d, 8)
3930
3931 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3932 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3933 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3934 GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
3935 GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
3936 GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
3937 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3938 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3939 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3940 GEN_VEXT_VF(vfmax_vf_h, 2)
3941 GEN_VEXT_VF(vfmax_vf_w, 4)
3942 GEN_VEXT_VF(vfmax_vf_d, 8)
3943
3944 /* Vector Floating-Point Sign-Injection Instructions */
3945 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3946 {
3947 return deposit64(b, 0, 15, a);
3948 }
3949
fsgnj32(uint32_t a,uint32_t b,float_status * s)3950 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3951 {
3952 return deposit64(b, 0, 31, a);
3953 }
3954
fsgnj64(uint64_t a,uint64_t b,float_status * s)3955 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3956 {
3957 return deposit64(b, 0, 63, a);
3958 }
3959
RVVCALL(OPFVV2,vfsgnj_vv_h,OP_UUU_H,H2,H2,H2,fsgnj16)3960 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3961 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3962 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3963 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
3964 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
3965 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
3966 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3967 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3968 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3969 GEN_VEXT_VF(vfsgnj_vf_h, 2)
3970 GEN_VEXT_VF(vfsgnj_vf_w, 4)
3971 GEN_VEXT_VF(vfsgnj_vf_d, 8)
3972
3973 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3974 {
3975 return deposit64(~b, 0, 15, a);
3976 }
3977
fsgnjn32(uint32_t a,uint32_t b,float_status * s)3978 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3979 {
3980 return deposit64(~b, 0, 31, a);
3981 }
3982
fsgnjn64(uint64_t a,uint64_t b,float_status * s)3983 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3984 {
3985 return deposit64(~b, 0, 63, a);
3986 }
3987
RVVCALL(OPFVV2,vfsgnjn_vv_h,OP_UUU_H,H2,H2,H2,fsgnjn16)3988 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3989 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3990 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3991 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
3992 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
3993 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
3994 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3995 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3996 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3997 GEN_VEXT_VF(vfsgnjn_vf_h, 2)
3998 GEN_VEXT_VF(vfsgnjn_vf_w, 4)
3999 GEN_VEXT_VF(vfsgnjn_vf_d, 8)
4000
4001 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4002 {
4003 return deposit64(b ^ a, 0, 15, a);
4004 }
4005
fsgnjx32(uint32_t a,uint32_t b,float_status * s)4006 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4007 {
4008 return deposit64(b ^ a, 0, 31, a);
4009 }
4010
fsgnjx64(uint64_t a,uint64_t b,float_status * s)4011 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4012 {
4013 return deposit64(b ^ a, 0, 63, a);
4014 }
4015
RVVCALL(OPFVV2,vfsgnjx_vv_h,OP_UUU_H,H2,H2,H2,fsgnjx16)4016 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4017 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4018 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4019 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4020 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4021 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
4022 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4023 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4024 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4025 GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4026 GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4027 GEN_VEXT_VF(vfsgnjx_vf_d, 8)
4028
4029 /* Vector Floating-Point Compare Instructions */
4030 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4031 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4032 CPURISCVState *env, uint32_t desc) \
4033 { \
4034 uint32_t vm = vext_vm(desc); \
4035 uint32_t vl = env->vl; \
4036 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4037 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4038 uint32_t vma = vext_vma(desc); \
4039 uint32_t i; \
4040 \
4041 VSTART_CHECK_EARLY_EXIT(env); \
4042 \
4043 for (i = env->vstart; i < vl; i++) { \
4044 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4045 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4046 if (!vm && !vext_elem_mask(v0, i)) { \
4047 /* set masked-off elements to 1s */ \
4048 if (vma) { \
4049 vext_set_elem_mask(vd, i, 1); \
4050 } \
4051 continue; \
4052 } \
4053 vext_set_elem_mask(vd, i, \
4054 DO_OP(s2, s1, &env->fp_status)); \
4055 } \
4056 env->vstart = 0; \
4057 /*
4058 * mask destination register are always tail-agnostic
4059 * set tail elements to 1s
4060 */ \
4061 if (vta_all_1s) { \
4062 for (; i < total_elems; i++) { \
4063 vext_set_elem_mask(vd, i, 1); \
4064 } \
4065 } \
4066 }
4067
4068 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4069 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4070 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4071
4072 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4073 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4074 CPURISCVState *env, uint32_t desc) \
4075 { \
4076 uint32_t vm = vext_vm(desc); \
4077 uint32_t vl = env->vl; \
4078 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4079 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4080 uint32_t vma = vext_vma(desc); \
4081 uint32_t i; \
4082 \
4083 VSTART_CHECK_EARLY_EXIT(env); \
4084 \
4085 for (i = env->vstart; i < vl; i++) { \
4086 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4087 if (!vm && !vext_elem_mask(v0, i)) { \
4088 /* set masked-off elements to 1s */ \
4089 if (vma) { \
4090 vext_set_elem_mask(vd, i, 1); \
4091 } \
4092 continue; \
4093 } \
4094 vext_set_elem_mask(vd, i, \
4095 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4096 } \
4097 env->vstart = 0; \
4098 /*
4099 * mask destination register are always tail-agnostic
4100 * set tail elements to 1s
4101 */ \
4102 if (vta_all_1s) { \
4103 for (; i < total_elems; i++) { \
4104 vext_set_elem_mask(vd, i, 1); \
4105 } \
4106 } \
4107 }
4108
4109 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4110 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4111 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4112
4113 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4114 {
4115 FloatRelation compare = float16_compare_quiet(a, b, s);
4116 return compare != float_relation_equal;
4117 }
4118
vmfne32(uint32_t a,uint32_t b,float_status * s)4119 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4120 {
4121 FloatRelation compare = float32_compare_quiet(a, b, s);
4122 return compare != float_relation_equal;
4123 }
4124
vmfne64(uint64_t a,uint64_t b,float_status * s)4125 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4126 {
4127 FloatRelation compare = float64_compare_quiet(a, b, s);
4128 return compare != float_relation_equal;
4129 }
4130
GEN_VEXT_CMP_VV_ENV(vmfne_vv_h,uint16_t,H2,vmfne16)4131 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4132 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4133 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4134 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4135 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4136 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4137
4138 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4139 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4140 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4141 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4142 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4143 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4144
4145 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4146 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4147 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4148 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4149 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4150 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4151
4152 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4153 {
4154 FloatRelation compare = float16_compare(a, b, s);
4155 return compare == float_relation_greater;
4156 }
4157
vmfgt32(uint32_t a,uint32_t b,float_status * s)4158 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4159 {
4160 FloatRelation compare = float32_compare(a, b, s);
4161 return compare == float_relation_greater;
4162 }
4163
vmfgt64(uint64_t a,uint64_t b,float_status * s)4164 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4165 {
4166 FloatRelation compare = float64_compare(a, b, s);
4167 return compare == float_relation_greater;
4168 }
4169
GEN_VEXT_CMP_VF(vmfgt_vf_h,uint16_t,H2,vmfgt16)4170 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4171 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4172 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4173
4174 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4175 {
4176 FloatRelation compare = float16_compare(a, b, s);
4177 return compare == float_relation_greater ||
4178 compare == float_relation_equal;
4179 }
4180
vmfge32(uint32_t a,uint32_t b,float_status * s)4181 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4182 {
4183 FloatRelation compare = float32_compare(a, b, s);
4184 return compare == float_relation_greater ||
4185 compare == float_relation_equal;
4186 }
4187
vmfge64(uint64_t a,uint64_t b,float_status * s)4188 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4189 {
4190 FloatRelation compare = float64_compare(a, b, s);
4191 return compare == float_relation_greater ||
4192 compare == float_relation_equal;
4193 }
4194
GEN_VEXT_CMP_VF(vmfge_vf_h,uint16_t,H2,vmfge16)4195 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4196 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4197 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4198
4199 /* Vector Floating-Point Classify Instruction */
4200 target_ulong fclass_h(uint64_t frs1)
4201 {
4202 float16 f = frs1;
4203 bool sign = float16_is_neg(f);
4204
4205 if (float16_is_infinity(f)) {
4206 return sign ? 1 << 0 : 1 << 7;
4207 } else if (float16_is_zero(f)) {
4208 return sign ? 1 << 3 : 1 << 4;
4209 } else if (float16_is_zero_or_denormal(f)) {
4210 return sign ? 1 << 2 : 1 << 5;
4211 } else if (float16_is_any_nan(f)) {
4212 float_status s = { }; /* for snan_bit_is_one */
4213 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4214 } else {
4215 return sign ? 1 << 1 : 1 << 6;
4216 }
4217 }
4218
fclass_s(uint64_t frs1)4219 target_ulong fclass_s(uint64_t frs1)
4220 {
4221 float32 f = frs1;
4222 bool sign = float32_is_neg(f);
4223
4224 if (float32_is_infinity(f)) {
4225 return sign ? 1 << 0 : 1 << 7;
4226 } else if (float32_is_zero(f)) {
4227 return sign ? 1 << 3 : 1 << 4;
4228 } else if (float32_is_zero_or_denormal(f)) {
4229 return sign ? 1 << 2 : 1 << 5;
4230 } else if (float32_is_any_nan(f)) {
4231 float_status s = { }; /* for snan_bit_is_one */
4232 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4233 } else {
4234 return sign ? 1 << 1 : 1 << 6;
4235 }
4236 }
4237
fclass_d(uint64_t frs1)4238 target_ulong fclass_d(uint64_t frs1)
4239 {
4240 float64 f = frs1;
4241 bool sign = float64_is_neg(f);
4242
4243 if (float64_is_infinity(f)) {
4244 return sign ? 1 << 0 : 1 << 7;
4245 } else if (float64_is_zero(f)) {
4246 return sign ? 1 << 3 : 1 << 4;
4247 } else if (float64_is_zero_or_denormal(f)) {
4248 return sign ? 1 << 2 : 1 << 5;
4249 } else if (float64_is_any_nan(f)) {
4250 float_status s = { }; /* for snan_bit_is_one */
4251 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4252 } else {
4253 return sign ? 1 << 1 : 1 << 6;
4254 }
4255 }
4256
RVVCALL(OPIVV1,vfclass_v_h,OP_UU_H,H2,H2,fclass_h)4257 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4258 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4259 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4260 GEN_VEXT_V(vfclass_v_h, 2)
4261 GEN_VEXT_V(vfclass_v_w, 4)
4262 GEN_VEXT_V(vfclass_v_d, 8)
4263
4264 /* Vector Floating-Point Merge Instruction */
4265
4266 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4267 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4268 CPURISCVState *env, uint32_t desc) \
4269 { \
4270 uint32_t vm = vext_vm(desc); \
4271 uint32_t vl = env->vl; \
4272 uint32_t esz = sizeof(ETYPE); \
4273 uint32_t total_elems = \
4274 vext_get_total_elems(env, desc, esz); \
4275 uint32_t vta = vext_vta(desc); \
4276 uint32_t i; \
4277 \
4278 VSTART_CHECK_EARLY_EXIT(env); \
4279 \
4280 for (i = env->vstart; i < vl; i++) { \
4281 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4282 *((ETYPE *)vd + H(i)) = \
4283 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4284 } \
4285 env->vstart = 0; \
4286 /* set tail elements to 1s */ \
4287 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4288 }
4289
4290 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4291 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4292 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4293
4294 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4295 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4296 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4297 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4298 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4299 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4300 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4301 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
4302
4303 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4304 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4305 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4306 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4307 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4308 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4309 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
4310
4311 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4312 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4313 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4314 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4315 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4316 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4317 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
4318
4319 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4320 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4321 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4322 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4323 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4324 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4325 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4326
4327 /* Widening Floating-Point/Integer Type-Convert Instructions */
4328 /* (TD, T2, TX2) */
4329 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4330 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4331 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4332 /*
4333 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4334 */
4335 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4336 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4337 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4338 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4339
4340 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4341 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4342 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4343 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4344 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4345
4346 /*
4347 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4348 */
4349 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4350 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4351 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4352 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4353 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4354 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4355
4356 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4357 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4358 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4359 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4360 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4361 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4362 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4363
4364 /*
4365 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4366 */
4367 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4368 {
4369 return float16_to_float32(a, true, s);
4370 }
4371
RVVCALL(OPFVV1,vfwcvt_f_f_v_h,WOP_UU_H,H4,H2,vfwcvtffv16)4372 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4373 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4374 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4375 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
4376
4377 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4378 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4379
4380 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4381 /* (TD, T2, TX2) */
4382 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4383 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4384 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4385 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4386 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4387 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4388 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4389 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4390 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4391 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
4392
4393 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4394 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4395 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4396 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4397 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4398 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4399 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
4400
4401 /*
4402 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4403 */
4404 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4405 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4406 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4407 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
4408
4409 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4410 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4411 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4412 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4413 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
4414
4415 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4416 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4417 {
4418 return float32_to_float16(a, true, s);
4419 }
4420
RVVCALL(OPFVV1,vfncvt_f_f_w_h,NOP_UU_H,H2,H4,vfncvtffv16)4421 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4422 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4423 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4424 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
4425
4426 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4427 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4428
4429 /*
4430 * Vector Reduction Operations
4431 */
4432 /* Vector Single-Width Integer Reduction Instructions */
4433 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4434 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4435 void *vs2, CPURISCVState *env, \
4436 uint32_t desc) \
4437 { \
4438 uint32_t vm = vext_vm(desc); \
4439 uint32_t vl = env->vl; \
4440 uint32_t esz = sizeof(TD); \
4441 uint32_t vlenb = simd_maxsz(desc); \
4442 uint32_t vta = vext_vta(desc); \
4443 uint32_t i; \
4444 TD s1 = *((TD *)vs1 + HD(0)); \
4445 \
4446 for (i = env->vstart; i < vl; i++) { \
4447 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4448 if (!vm && !vext_elem_mask(v0, i)) { \
4449 continue; \
4450 } \
4451 s1 = OP(s1, (TD)s2); \
4452 } \
4453 *((TD *)vd + HD(0)) = s1; \
4454 env->vstart = 0; \
4455 /* set tail elements to 1s */ \
4456 vext_set_elems_1s(vd, vta, esz, vlenb); \
4457 }
4458
4459 /* vd[0] = sum(vs1[0], vs2[*]) */
4460 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4461 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4462 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4463 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4464
4465 /* vd[0] = maxu(vs1[0], vs2[*]) */
4466 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4467 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4468 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4469 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4470
4471 /* vd[0] = max(vs1[0], vs2[*]) */
4472 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4473 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4474 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4475 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4476
4477 /* vd[0] = minu(vs1[0], vs2[*]) */
4478 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4479 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4480 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4481 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4482
4483 /* vd[0] = min(vs1[0], vs2[*]) */
4484 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4485 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4486 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4487 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4488
4489 /* vd[0] = and(vs1[0], vs2[*]) */
4490 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4491 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4492 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4493 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4494
4495 /* vd[0] = or(vs1[0], vs2[*]) */
4496 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4497 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4498 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4499 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4500
4501 /* vd[0] = xor(vs1[0], vs2[*]) */
4502 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4503 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4504 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4505 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4506
4507 /* Vector Widening Integer Reduction Instructions */
4508 /* signed sum reduction into double-width accumulator */
4509 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4510 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4511 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4512
4513 /* Unsigned sum reduction into double-width accumulator */
4514 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4515 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4516 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4517
4518 /* Vector Single-Width Floating-Point Reduction Instructions */
4519 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4520 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4521 void *vs2, CPURISCVState *env, \
4522 uint32_t desc) \
4523 { \
4524 uint32_t vm = vext_vm(desc); \
4525 uint32_t vl = env->vl; \
4526 uint32_t esz = sizeof(TD); \
4527 uint32_t vlenb = simd_maxsz(desc); \
4528 uint32_t vta = vext_vta(desc); \
4529 uint32_t i; \
4530 TD s1 = *((TD *)vs1 + HD(0)); \
4531 \
4532 for (i = env->vstart; i < vl; i++) { \
4533 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4534 if (!vm && !vext_elem_mask(v0, i)) { \
4535 continue; \
4536 } \
4537 s1 = OP(s1, (TD)s2, &env->fp_status); \
4538 } \
4539 *((TD *)vd + HD(0)) = s1; \
4540 env->vstart = 0; \
4541 /* set tail elements to 1s */ \
4542 vext_set_elems_1s(vd, vta, esz, vlenb); \
4543 }
4544
4545 /* Unordered sum */
4546 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4547 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4548 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4549
4550 /* Ordered sum */
4551 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4552 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4553 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4554
4555 /* Maximum value */
4556 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4557 float16_maximum_number)
4558 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4559 float32_maximum_number)
4560 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4561 float64_maximum_number)
4562
4563 /* Minimum value */
4564 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4565 float16_minimum_number)
4566 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4567 float32_minimum_number)
4568 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4569 float64_minimum_number)
4570
4571 /* Vector Widening Floating-Point Add Instructions */
4572 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
4573 {
4574 return float32_add(a, float16_to_float32(b, true, s), s);
4575 }
4576
fwadd32(uint64_t a,uint32_t b,float_status * s)4577 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
4578 {
4579 return float64_add(a, float32_to_float64(b, s), s);
4580 }
4581
4582 /* Vector Widening Floating-Point Reduction Instructions */
4583 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
GEN_VEXT_FRED(vfwredusum_vs_h,uint32_t,uint16_t,H4,H2,fwadd16)4584 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4585 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4586 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4587 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4588
4589 /*
4590 * Vector Mask Operations
4591 */
4592 /* Vector Mask-Register Logical Instructions */
4593 #define GEN_VEXT_MASK_VV(NAME, OP) \
4594 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4595 void *vs2, CPURISCVState *env, \
4596 uint32_t desc) \
4597 { \
4598 uint32_t vl = env->vl; \
4599 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
4600 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4601 uint32_t i; \
4602 int a, b; \
4603 \
4604 VSTART_CHECK_EARLY_EXIT(env); \
4605 \
4606 for (i = env->vstart; i < vl; i++) { \
4607 a = vext_elem_mask(vs1, i); \
4608 b = vext_elem_mask(vs2, i); \
4609 vext_set_elem_mask(vd, i, OP(b, a)); \
4610 } \
4611 env->vstart = 0; \
4612 /*
4613 * mask destination register are always tail-agnostic
4614 * set tail elements to 1s
4615 */ \
4616 if (vta_all_1s) { \
4617 for (; i < total_elems; i++) { \
4618 vext_set_elem_mask(vd, i, 1); \
4619 } \
4620 } \
4621 }
4622
4623 #define DO_NAND(N, M) (!(N & M))
4624 #define DO_ANDNOT(N, M) (N & !M)
4625 #define DO_NOR(N, M) (!(N | M))
4626 #define DO_ORNOT(N, M) (N | !M)
4627 #define DO_XNOR(N, M) (!(N ^ M))
4628
4629 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4630 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4631 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4632 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4633 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4634 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4635 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4636 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4637
4638 /* Vector count population in mask vcpop */
4639 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4640 uint32_t desc)
4641 {
4642 target_ulong cnt = 0;
4643 uint32_t vm = vext_vm(desc);
4644 uint32_t vl = env->vl;
4645 int i;
4646
4647 for (i = env->vstart; i < vl; i++) {
4648 if (vm || vext_elem_mask(v0, i)) {
4649 if (vext_elem_mask(vs2, i)) {
4650 cnt++;
4651 }
4652 }
4653 }
4654 env->vstart = 0;
4655 return cnt;
4656 }
4657
4658 /* vfirst find-first-set mask bit */
HELPER(vfirst_m)4659 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4660 uint32_t desc)
4661 {
4662 uint32_t vm = vext_vm(desc);
4663 uint32_t vl = env->vl;
4664 int i;
4665
4666 for (i = env->vstart; i < vl; i++) {
4667 if (vm || vext_elem_mask(v0, i)) {
4668 if (vext_elem_mask(vs2, i)) {
4669 return i;
4670 }
4671 }
4672 }
4673 env->vstart = 0;
4674 return -1LL;
4675 }
4676
4677 enum set_mask_type {
4678 ONLY_FIRST = 1,
4679 INCLUDE_FIRST,
4680 BEFORE_FIRST,
4681 };
4682
vmsetm(void * vd,void * v0,void * vs2,CPURISCVState * env,uint32_t desc,enum set_mask_type type)4683 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4684 uint32_t desc, enum set_mask_type type)
4685 {
4686 uint32_t vm = vext_vm(desc);
4687 uint32_t vl = env->vl;
4688 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
4689 uint32_t vta_all_1s = vext_vta_all_1s(desc);
4690 uint32_t vma = vext_vma(desc);
4691 int i;
4692 bool first_mask_bit = false;
4693
4694 for (i = env->vstart; i < vl; i++) {
4695 if (!vm && !vext_elem_mask(v0, i)) {
4696 /* set masked-off elements to 1s */
4697 if (vma) {
4698 vext_set_elem_mask(vd, i, 1);
4699 }
4700 continue;
4701 }
4702 /* write a zero to all following active elements */
4703 if (first_mask_bit) {
4704 vext_set_elem_mask(vd, i, 0);
4705 continue;
4706 }
4707 if (vext_elem_mask(vs2, i)) {
4708 first_mask_bit = true;
4709 if (type == BEFORE_FIRST) {
4710 vext_set_elem_mask(vd, i, 0);
4711 } else {
4712 vext_set_elem_mask(vd, i, 1);
4713 }
4714 } else {
4715 if (type == ONLY_FIRST) {
4716 vext_set_elem_mask(vd, i, 0);
4717 } else {
4718 vext_set_elem_mask(vd, i, 1);
4719 }
4720 }
4721 }
4722 env->vstart = 0;
4723 /*
4724 * mask destination register are always tail-agnostic
4725 * set tail elements to 1s
4726 */
4727 if (vta_all_1s) {
4728 for (; i < total_elems; i++) {
4729 vext_set_elem_mask(vd, i, 1);
4730 }
4731 }
4732 }
4733
HELPER(vmsbf_m)4734 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4735 uint32_t desc)
4736 {
4737 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4738 }
4739
HELPER(vmsif_m)4740 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4741 uint32_t desc)
4742 {
4743 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4744 }
4745
HELPER(vmsof_m)4746 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4747 uint32_t desc)
4748 {
4749 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4750 }
4751
4752 /* Vector Iota Instruction */
4753 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
4754 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4755 uint32_t desc) \
4756 { \
4757 uint32_t vm = vext_vm(desc); \
4758 uint32_t vl = env->vl; \
4759 uint32_t esz = sizeof(ETYPE); \
4760 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4761 uint32_t vta = vext_vta(desc); \
4762 uint32_t vma = vext_vma(desc); \
4763 uint32_t sum = 0; \
4764 int i; \
4765 \
4766 for (i = env->vstart; i < vl; i++) { \
4767 if (!vm && !vext_elem_mask(v0, i)) { \
4768 /* set masked-off elements to 1s */ \
4769 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4770 continue; \
4771 } \
4772 *((ETYPE *)vd + H(i)) = sum; \
4773 if (vext_elem_mask(vs2, i)) { \
4774 sum++; \
4775 } \
4776 } \
4777 env->vstart = 0; \
4778 /* set tail elements to 1s */ \
4779 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4780 }
4781
GEN_VEXT_VIOTA_M(viota_m_b,uint8_t,H1)4782 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4783 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4784 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4785 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4786
4787 /* Vector Element Index Instruction */
4788 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
4789 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4790 { \
4791 uint32_t vm = vext_vm(desc); \
4792 uint32_t vl = env->vl; \
4793 uint32_t esz = sizeof(ETYPE); \
4794 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4795 uint32_t vta = vext_vta(desc); \
4796 uint32_t vma = vext_vma(desc); \
4797 int i; \
4798 \
4799 VSTART_CHECK_EARLY_EXIT(env); \
4800 \
4801 for (i = env->vstart; i < vl; i++) { \
4802 if (!vm && !vext_elem_mask(v0, i)) { \
4803 /* set masked-off elements to 1s */ \
4804 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4805 continue; \
4806 } \
4807 *((ETYPE *)vd + H(i)) = i; \
4808 } \
4809 env->vstart = 0; \
4810 /* set tail elements to 1s */ \
4811 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4812 }
4813
4814 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4815 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4816 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4817 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4818
4819 /*
4820 * Vector Permutation Instructions
4821 */
4822
4823 /* Vector Slide Instructions */
4824 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
4825 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4826 CPURISCVState *env, uint32_t desc) \
4827 { \
4828 uint32_t vm = vext_vm(desc); \
4829 uint32_t vl = env->vl; \
4830 uint32_t esz = sizeof(ETYPE); \
4831 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4832 uint32_t vta = vext_vta(desc); \
4833 uint32_t vma = vext_vma(desc); \
4834 target_ulong offset = s1, i_min, i; \
4835 \
4836 VSTART_CHECK_EARLY_EXIT(env); \
4837 \
4838 i_min = MAX(env->vstart, offset); \
4839 for (i = i_min; i < vl; i++) { \
4840 if (!vm && !vext_elem_mask(v0, i)) { \
4841 /* set masked-off elements to 1s */ \
4842 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4843 continue; \
4844 } \
4845 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4846 } \
4847 env->vstart = 0; \
4848 /* set tail elements to 1s */ \
4849 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4850 }
4851
4852 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4853 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4854 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4855 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4856 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4857
4858 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
4859 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4860 CPURISCVState *env, uint32_t desc) \
4861 { \
4862 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4863 uint32_t vm = vext_vm(desc); \
4864 uint32_t vl = env->vl; \
4865 uint32_t esz = sizeof(ETYPE); \
4866 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4867 uint32_t vta = vext_vta(desc); \
4868 uint32_t vma = vext_vma(desc); \
4869 target_ulong i_max, i_min, i; \
4870 \
4871 VSTART_CHECK_EARLY_EXIT(env); \
4872 \
4873 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
4874 i_max = MAX(i_min, env->vstart); \
4875 for (i = env->vstart; i < i_max; ++i) { \
4876 if (!vm && !vext_elem_mask(v0, i)) { \
4877 /* set masked-off elements to 1s */ \
4878 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4879 continue; \
4880 } \
4881 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4882 } \
4883 \
4884 for (i = i_max; i < vl; ++i) { \
4885 if (vm || vext_elem_mask(v0, i)) { \
4886 *((ETYPE *)vd + H(i)) = 0; \
4887 } \
4888 } \
4889 \
4890 env->vstart = 0; \
4891 /* set tail elements to 1s */ \
4892 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4893 }
4894
4895 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4896 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4897 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4898 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4899 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4900
4901 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
4902 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
4903 void *vs2, CPURISCVState *env, \
4904 uint32_t desc) \
4905 { \
4906 typedef uint##BITWIDTH##_t ETYPE; \
4907 uint32_t vm = vext_vm(desc); \
4908 uint32_t vl = env->vl; \
4909 uint32_t esz = sizeof(ETYPE); \
4910 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4911 uint32_t vta = vext_vta(desc); \
4912 uint32_t vma = vext_vma(desc); \
4913 uint32_t i; \
4914 \
4915 VSTART_CHECK_EARLY_EXIT(env); \
4916 \
4917 for (i = env->vstart; i < vl; i++) { \
4918 if (!vm && !vext_elem_mask(v0, i)) { \
4919 /* set masked-off elements to 1s */ \
4920 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4921 continue; \
4922 } \
4923 if (i == 0) { \
4924 *((ETYPE *)vd + H(i)) = s1; \
4925 } else { \
4926 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4927 } \
4928 } \
4929 env->vstart = 0; \
4930 /* set tail elements to 1s */ \
4931 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4932 }
4933
4934 GEN_VEXT_VSLIE1UP(8, H1)
4935 GEN_VEXT_VSLIE1UP(16, H2)
4936 GEN_VEXT_VSLIE1UP(32, H4)
4937 GEN_VEXT_VSLIE1UP(64, H8)
4938
4939 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
4940 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4941 CPURISCVState *env, uint32_t desc) \
4942 { \
4943 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4944 }
4945
4946 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4947 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4948 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4949 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4950 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4951
4952 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
4953 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
4954 void *vs2, CPURISCVState *env, \
4955 uint32_t desc) \
4956 { \
4957 typedef uint##BITWIDTH##_t ETYPE; \
4958 uint32_t vm = vext_vm(desc); \
4959 uint32_t vl = env->vl; \
4960 uint32_t esz = sizeof(ETYPE); \
4961 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4962 uint32_t vta = vext_vta(desc); \
4963 uint32_t vma = vext_vma(desc); \
4964 uint32_t i; \
4965 \
4966 VSTART_CHECK_EARLY_EXIT(env); \
4967 \
4968 for (i = env->vstart; i < vl; i++) { \
4969 if (!vm && !vext_elem_mask(v0, i)) { \
4970 /* set masked-off elements to 1s */ \
4971 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4972 continue; \
4973 } \
4974 if (i == vl - 1) { \
4975 *((ETYPE *)vd + H(i)) = s1; \
4976 } else { \
4977 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4978 } \
4979 } \
4980 env->vstart = 0; \
4981 /* set tail elements to 1s */ \
4982 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4983 }
4984
4985 GEN_VEXT_VSLIDE1DOWN(8, H1)
4986 GEN_VEXT_VSLIDE1DOWN(16, H2)
4987 GEN_VEXT_VSLIDE1DOWN(32, H4)
4988 GEN_VEXT_VSLIDE1DOWN(64, H8)
4989
4990 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
4991 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4992 CPURISCVState *env, uint32_t desc) \
4993 { \
4994 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4995 }
4996
4997 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4998 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4999 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5000 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5001 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5002
5003 /* Vector Floating-Point Slide Instructions */
5004 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
5005 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5006 CPURISCVState *env, uint32_t desc) \
5007 { \
5008 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5009 }
5010
5011 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5012 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5013 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5014 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5015
5016 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
5017 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5018 CPURISCVState *env, uint32_t desc) \
5019 { \
5020 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5021 }
5022
5023 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5024 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5025 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5026 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
5027
5028 /* Vector Register Gather Instruction */
5029 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
5030 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5031 CPURISCVState *env, uint32_t desc) \
5032 { \
5033 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
5034 uint32_t vm = vext_vm(desc); \
5035 uint32_t vl = env->vl; \
5036 uint32_t esz = sizeof(TS2); \
5037 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5038 uint32_t vta = vext_vta(desc); \
5039 uint32_t vma = vext_vma(desc); \
5040 uint64_t index; \
5041 uint32_t i; \
5042 \
5043 VSTART_CHECK_EARLY_EXIT(env); \
5044 \
5045 for (i = env->vstart; i < vl; i++) { \
5046 if (!vm && !vext_elem_mask(v0, i)) { \
5047 /* set masked-off elements to 1s */ \
5048 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5049 continue; \
5050 } \
5051 index = *((TS1 *)vs1 + HS1(i)); \
5052 if (index >= vlmax) { \
5053 *((TS2 *)vd + HS2(i)) = 0; \
5054 } else { \
5055 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
5056 } \
5057 } \
5058 env->vstart = 0; \
5059 /* set tail elements to 1s */ \
5060 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5061 }
5062
5063 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
5064 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5065 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5066 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5067 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5068
5069 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5070 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5071 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5072 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5073
5074 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
5075 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5076 CPURISCVState *env, uint32_t desc) \
5077 { \
5078 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5079 uint32_t vm = vext_vm(desc); \
5080 uint32_t vl = env->vl; \
5081 uint32_t esz = sizeof(ETYPE); \
5082 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5083 uint32_t vta = vext_vta(desc); \
5084 uint32_t vma = vext_vma(desc); \
5085 uint64_t index = s1; \
5086 uint32_t i; \
5087 \
5088 VSTART_CHECK_EARLY_EXIT(env); \
5089 \
5090 for (i = env->vstart; i < vl; i++) { \
5091 if (!vm && !vext_elem_mask(v0, i)) { \
5092 /* set masked-off elements to 1s */ \
5093 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5094 continue; \
5095 } \
5096 if (index >= vlmax) { \
5097 *((ETYPE *)vd + H(i)) = 0; \
5098 } else { \
5099 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5100 } \
5101 } \
5102 env->vstart = 0; \
5103 /* set tail elements to 1s */ \
5104 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5105 }
5106
5107 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
5108 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5109 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5110 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5111 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
5112
5113 /* Vector Compress Instruction */
5114 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
5115 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5116 CPURISCVState *env, uint32_t desc) \
5117 { \
5118 uint32_t vl = env->vl; \
5119 uint32_t esz = sizeof(ETYPE); \
5120 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5121 uint32_t vta = vext_vta(desc); \
5122 uint32_t num = 0, i; \
5123 \
5124 for (i = env->vstart; i < vl; i++) { \
5125 if (!vext_elem_mask(vs1, i)) { \
5126 continue; \
5127 } \
5128 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5129 num++; \
5130 } \
5131 env->vstart = 0; \
5132 /* set tail elements to 1s */ \
5133 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \
5134 }
5135
5136 /* Compress into vd elements of vs2 where vs1 is enabled */
5137 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5138 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5139 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5140 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5141
5142 /* Vector Whole Register Move */
5143 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5144 {
5145 /* EEW = SEW */
5146 uint32_t maxsz = simd_maxsz(desc);
5147 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5148 uint32_t startb = env->vstart * sewb;
5149 uint32_t i = startb;
5150
5151 if (startb >= maxsz) {
5152 env->vstart = 0;
5153 return;
5154 }
5155
5156 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5157 uint32_t j = ROUND_UP(i, 8);
5158 memcpy((uint8_t *)vd + H1(j - 1),
5159 (uint8_t *)vs2 + H1(j - 1),
5160 j - i);
5161 i = j;
5162 }
5163
5164 memcpy((uint8_t *)vd + H1(i),
5165 (uint8_t *)vs2 + H1(i),
5166 maxsz - i);
5167
5168 env->vstart = 0;
5169 }
5170
5171 /* Vector Integer Extension */
5172 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5173 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5174 CPURISCVState *env, uint32_t desc) \
5175 { \
5176 uint32_t vl = env->vl; \
5177 uint32_t vm = vext_vm(desc); \
5178 uint32_t esz = sizeof(ETYPE); \
5179 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5180 uint32_t vta = vext_vta(desc); \
5181 uint32_t vma = vext_vma(desc); \
5182 uint32_t i; \
5183 \
5184 VSTART_CHECK_EARLY_EXIT(env); \
5185 \
5186 for (i = env->vstart; i < vl; i++) { \
5187 if (!vm && !vext_elem_mask(v0, i)) { \
5188 /* set masked-off elements to 1s */ \
5189 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5190 continue; \
5191 } \
5192 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5193 } \
5194 env->vstart = 0; \
5195 /* set tail elements to 1s */ \
5196 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5197 }
5198
5199 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5200 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5201 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5202 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5203 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5204 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5205
5206 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5207 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5208 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5209 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5210 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5211 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)
5212