1/*
2 *
3 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2 or later, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17#include "tcg/tcg-op-gvec.h"
18#include "tcg/tcg-gvec-desc.h"
19#include "internals.h"
20
21static inline bool is_overlapped(const int8_t astart, int8_t asize,
22                                 const int8_t bstart, int8_t bsize)
23{
24    const int8_t aend = astart + asize;
25    const int8_t bend = bstart + bsize;
26
27    return MAX(aend, bend) - MIN(astart, bstart) < asize + bsize;
28}
29
30static bool require_rvv(DisasContext *s)
31{
32    return s->mstatus_vs != 0;
33}
34
35static bool require_rvf(DisasContext *s)
36{
37    if (s->mstatus_fs == 0) {
38        return false;
39    }
40
41    switch (s->sew) {
42    case MO_16:
43    case MO_32:
44        return has_ext(s, RVF);
45    case MO_64:
46        return has_ext(s, RVD);
47    default:
48        return false;
49    }
50}
51
52static bool require_scale_rvf(DisasContext *s)
53{
54    if (s->mstatus_fs == 0) {
55        return false;
56    }
57
58    switch (s->sew) {
59    case MO_8:
60    case MO_16:
61        return has_ext(s, RVF);
62    case MO_32:
63        return has_ext(s, RVD);
64    default:
65        return false;
66    }
67}
68
69/* Destination vector register group cannot overlap source mask register. */
70static bool require_vm(int vm, int vd)
71{
72    return (vm != 0 || vd != 0);
73}
74
75static bool require_nf(int vd, int nf, int lmul)
76{
77    int size = nf << MAX(lmul, 0);
78    return size <= 8 && vd + size <= 32;
79}
80
81/*
82 * Vector register should aligned with the passed-in LMUL (EMUL).
83 * If LMUL < 0, i.e. fractional LMUL, any vector register is allowed.
84 */
85static bool require_align(const int8_t val, const int8_t lmul)
86{
87    return lmul <= 0 || extract32(val, 0, lmul) == 0;
88}
89
90/*
91 * A destination vector register group can overlap a source vector
92 * register group only if one of the following holds:
93 *  1. The destination EEW equals the source EEW.
94 *  2. The destination EEW is smaller than the source EEW and the overlap
95 *     is in the lowest-numbered part of the source register group.
96 *  3. The destination EEW is greater than the source EEW, the source EMUL
97 *     is at least 1, and the overlap is in the highest-numbered part of
98 *     the destination register group.
99 * (Section 5.2)
100 *
101 * This function returns true if one of the following holds:
102 *  * Destination vector register group does not overlap a source vector
103 *    register group.
104 *  * Rule 3 met.
105 * For rule 1, overlap is allowed so this function doesn't need to be called.
106 * For rule 2, (vd == vs). Caller has to check whether: (vd != vs) before
107 * calling this function.
108 */
109static bool require_noover(const int8_t dst, const int8_t dst_lmul,
110                           const int8_t src, const int8_t src_lmul)
111{
112    int8_t dst_size = dst_lmul <= 0 ? 1 : 1 << dst_lmul;
113    int8_t src_size = src_lmul <= 0 ? 1 : 1 << src_lmul;
114
115    /* Destination EEW is greater than the source EEW, check rule 3. */
116    if (dst_size > src_size) {
117        if (dst < src &&
118            src_lmul >= 0 &&
119            is_overlapped(dst, dst_size, src, src_size) &&
120            !is_overlapped(dst, dst_size, src + src_size, src_size)) {
121            return true;
122        }
123    }
124
125    return !is_overlapped(dst, dst_size, src, src_size);
126}
127
128static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2)
129{
130    TCGv s1, dst;
131
132    if (!require_rvv(s) || !has_ext(s, RVV)) {
133        return false;
134    }
135
136    dst = dest_gpr(s, rd);
137
138    if (rd == 0 && rs1 == 0) {
139        s1 = tcg_temp_new();
140        tcg_gen_mov_tl(s1, cpu_vl);
141    } else if (rs1 == 0) {
142        /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
143        s1 = tcg_constant_tl(RV_VLEN_MAX);
144    } else {
145        s1 = get_gpr(s, rs1, EXT_ZERO);
146    }
147
148    gen_helper_vsetvl(dst, cpu_env, s1, s2);
149    gen_set_gpr(s, rd, dst);
150    mark_vs_dirty(s);
151
152    tcg_gen_movi_tl(cpu_pc, s->pc_succ_insn);
153    tcg_gen_lookup_and_goto_ptr();
154    s->base.is_jmp = DISAS_NORETURN;
155
156    if (rd == 0 && rs1 == 0) {
157        tcg_temp_free(s1);
158    }
159
160    return true;
161}
162
163static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2)
164{
165    TCGv dst;
166
167    if (!require_rvv(s) || !has_ext(s, RVV)) {
168        return false;
169    }
170
171    dst = dest_gpr(s, rd);
172
173    gen_helper_vsetvl(dst, cpu_env, s1, s2);
174    gen_set_gpr(s, rd, dst);
175    mark_vs_dirty(s);
176    tcg_gen_movi_tl(cpu_pc, s->pc_succ_insn);
177    tcg_gen_lookup_and_goto_ptr();
178    s->base.is_jmp = DISAS_NORETURN;
179
180    return true;
181}
182
183static bool trans_vsetvl(DisasContext *s, arg_vsetvl *a)
184{
185    TCGv s2 = get_gpr(s, a->rs2, EXT_ZERO);
186    return do_vsetvl(s, a->rd, a->rs1, s2);
187}
188
189static bool trans_vsetvli(DisasContext *s, arg_vsetvli *a)
190{
191    TCGv s2 = tcg_constant_tl(a->zimm);
192    return do_vsetvl(s, a->rd, a->rs1, s2);
193}
194
195static bool trans_vsetivli(DisasContext *s, arg_vsetivli *a)
196{
197    TCGv s1 = tcg_const_tl(a->rs1);
198    TCGv s2 = tcg_const_tl(a->zimm);
199    return do_vsetivli(s, a->rd, s1, s2);
200}
201
202/* vector register offset from env */
203static uint32_t vreg_ofs(DisasContext *s, int reg)
204{
205    return offsetof(CPURISCVState, vreg) + reg * s->vlen / 8;
206}
207
208/* check functions */
209
210/*
211 * Vector unit-stride, strided, unit-stride segment, strided segment
212 * store check function.
213 *
214 * Rules to be checked here:
215 *   1. EMUL must within the range: 1/8 <= EMUL <= 8. (Section 7.3)
216 *   2. Destination vector register number is multiples of EMUL.
217 *      (Section 3.4.2, 7.3)
218 *   3. The EMUL setting must be such that EMUL * NFIELDS ≤ 8. (Section 7.8)
219 *   4. Vector register numbers accessed by the segment load or store
220 *      cannot increment past 31. (Section 7.8)
221 */
222static bool vext_check_store(DisasContext *s, int vd, int nf, uint8_t eew)
223{
224    int8_t emul = eew - s->sew + s->lmul;
225    return (emul >= -3 && emul <= 3) &&
226            require_align(vd, emul) &&
227            require_nf(vd, nf, emul);
228}
229
230/*
231 * Vector unit-stride, strided, unit-stride segment, strided segment
232 * load check function.
233 *
234 * Rules to be checked here:
235 *   1. All rules applies to store instructions are applies
236 *      to load instructions.
237 *   2. Destination vector register group for a masked vector
238 *      instruction cannot overlap the source mask register (v0).
239 *      (Section 5.3)
240 */
241static bool vext_check_load(DisasContext *s, int vd, int nf, int vm,
242                            uint8_t eew)
243{
244    return vext_check_store(s, vd, nf, eew) && require_vm(vm, vd);
245}
246
247/*
248 * Vector indexed, indexed segment store check function.
249 *
250 * Rules to be checked here:
251 *   1. EMUL must within the range: 1/8 <= EMUL <= 8. (Section 7.3)
252 *   2. Index vector register number is multiples of EMUL.
253 *      (Section 3.4.2, 7.3)
254 *   3. Destination vector register number is multiples of LMUL.
255 *      (Section 3.4.2, 7.3)
256 *   4. The EMUL setting must be such that EMUL * NFIELDS ≤ 8. (Section 7.8)
257 *   5. Vector register numbers accessed by the segment load or store
258 *      cannot increment past 31. (Section 7.8)
259 */
260static bool vext_check_st_index(DisasContext *s, int vd, int vs2, int nf,
261                                uint8_t eew)
262{
263    int8_t emul = eew - s->sew + s->lmul;
264    return (emul >= -3 && emul <= 3) &&
265            require_align(vs2, emul) &&
266            require_align(vd, s->lmul) &&
267            require_nf(vd, nf, s->lmul);
268}
269
270/*
271 * Vector indexed, indexed segment load check function.
272 *
273 * Rules to be checked here:
274 *   1. All rules applies to store instructions are applies
275 *      to load instructions.
276 *   2. Destination vector register group for a masked vector
277 *      instruction cannot overlap the source mask register (v0).
278 *      (Section 5.3)
279 *   3. Destination vector register cannot overlap a source vector
280 *      register (vs2) group.
281 *      (Section 5.2)
282 *   4. Destination vector register groups cannot overlap
283 *      the source vector register (vs2) group for
284 *      indexed segment load instructions. (Section 7.8.3)
285 */
286static bool vext_check_ld_index(DisasContext *s, int vd, int vs2,
287                                int nf, int vm, uint8_t eew)
288{
289    int8_t seg_vd;
290    int8_t emul = eew - s->sew + s->lmul;
291    bool ret = vext_check_st_index(s, vd, vs2, nf, eew) &&
292        require_vm(vm, vd);
293
294    /* Each segment register group has to follow overlap rules. */
295    for (int i = 0; i < nf; ++i) {
296        seg_vd = vd + (1 << MAX(s->lmul, 0)) * i;
297
298        if (eew > s->sew) {
299            if (seg_vd != vs2) {
300                ret &= require_noover(seg_vd, s->lmul, vs2, emul);
301            }
302        } else if (eew < s->sew) {
303            ret &= require_noover(seg_vd, s->lmul, vs2, emul);
304        }
305
306        /*
307         * Destination vector register groups cannot overlap
308         * the source vector register (vs2) group for
309         * indexed segment load instructions.
310         */
311        if (nf > 1) {
312            ret &= !is_overlapped(seg_vd, 1 << MAX(s->lmul, 0),
313                                  vs2, 1 << MAX(emul, 0));
314        }
315    }
316    return ret;
317}
318
319static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm)
320{
321    return require_vm(vm, vd) &&
322        require_align(vd, s->lmul) &&
323        require_align(vs, s->lmul);
324}
325
326/*
327 * Check function for vector instruction with format:
328 * single-width result and single-width sources (SEW = SEW op SEW)
329 *
330 * Rules to be checked here:
331 *   1. Destination vector register group for a masked vector
332 *      instruction cannot overlap the source mask register (v0).
333 *      (Section 5.3)
334 *   2. Destination vector register number is multiples of LMUL.
335 *      (Section 3.4.2)
336 *   3. Source (vs2, vs1) vector register number are multiples of LMUL.
337 *      (Section 3.4.2)
338 */
339static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm)
340{
341    return vext_check_ss(s, vd, vs2, vm) &&
342        require_align(vs1, s->lmul);
343}
344
345static bool vext_check_ms(DisasContext *s, int vd, int vs)
346{
347    bool ret = require_align(vs, s->lmul);
348    if (vd != vs) {
349        ret &= require_noover(vd, 0, vs, s->lmul);
350    }
351    return ret;
352}
353
354/*
355 * Check function for maskable vector instruction with format:
356 * single-width result and single-width sources (SEW = SEW op SEW)
357 *
358 * Rules to be checked here:
359 *   1. Source (vs2, vs1) vector register number are multiples of LMUL.
360 *      (Section 3.4.2)
361 *   2. Destination vector register cannot overlap a source vector
362 *      register (vs2, vs1) group.
363 *      (Section 5.2)
364 *   3. The destination vector register group for a masked vector
365 *      instruction cannot overlap the source mask register (v0),
366 *      unless the destination vector register is being written
367 *      with a mask value (e.g., comparisons) or the scalar result
368 *      of a reduction. (Section 5.3)
369 */
370static bool vext_check_mss(DisasContext *s, int vd, int vs1, int vs2)
371{
372    bool ret = vext_check_ms(s, vd, vs2) &&
373        require_align(vs1, s->lmul);
374    if (vd != vs1) {
375        ret &= require_noover(vd, 0, vs1, s->lmul);
376    }
377    return ret;
378}
379
380/*
381 * Common check function for vector widening instructions
382 * of double-width result (2*SEW).
383 *
384 * Rules to be checked here:
385 *   1. The largest vector register group used by an instruction
386 *      can not be greater than 8 vector registers (Section 5.2):
387 *      => LMUL < 8.
388 *      => SEW < 64.
389 *   2. Destination vector register number is multiples of 2 * LMUL.
390 *      (Section 3.4.2)
391 *   3. Destination vector register group for a masked vector
392 *      instruction cannot overlap the source mask register (v0).
393 *      (Section 5.3)
394 */
395static bool vext_wide_check_common(DisasContext *s, int vd, int vm)
396{
397    return (s->lmul <= 2) &&
398           (s->sew < MO_64) &&
399           require_align(vd, s->lmul + 1) &&
400           require_vm(vm, vd);
401}
402
403/*
404 * Common check function for vector narrowing instructions
405 * of single-width result (SEW) and double-width source (2*SEW).
406 *
407 * Rules to be checked here:
408 *   1. The largest vector register group used by an instruction
409 *      can not be greater than 8 vector registers (Section 5.2):
410 *      => LMUL < 8.
411 *      => SEW < 64.
412 *   2. Source vector register number is multiples of 2 * LMUL.
413 *      (Section 3.4.2)
414 *   3. Destination vector register number is multiples of LMUL.
415 *      (Section 3.4.2)
416 *   4. Destination vector register group for a masked vector
417 *      instruction cannot overlap the source mask register (v0).
418 *      (Section 5.3)
419 */
420static bool vext_narrow_check_common(DisasContext *s, int vd, int vs2,
421                                     int vm)
422{
423    return (s->lmul <= 2) &&
424           (s->sew < MO_64) &&
425           require_align(vs2, s->lmul + 1) &&
426           require_align(vd, s->lmul) &&
427           require_vm(vm, vd);
428}
429
430static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm)
431{
432    return vext_wide_check_common(s, vd, vm) &&
433        require_align(vs, s->lmul) &&
434        require_noover(vd, s->lmul + 1, vs, s->lmul);
435}
436
437static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm)
438{
439    return vext_wide_check_common(s, vd, vm) &&
440        require_align(vs, s->lmul + 1);
441}
442
443/*
444 * Check function for vector instruction with format:
445 * double-width result and single-width sources (2*SEW = SEW op SEW)
446 *
447 * Rules to be checked here:
448 *   1. All rules in defined in widen common rules are applied.
449 *   2. Source (vs2, vs1) vector register number are multiples of LMUL.
450 *      (Section 3.4.2)
451 *   3. Destination vector register cannot overlap a source vector
452 *      register (vs2, vs1) group.
453 *      (Section 5.2)
454 */
455static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm)
456{
457    return vext_check_ds(s, vd, vs2, vm) &&
458        require_align(vs1, s->lmul) &&
459        require_noover(vd, s->lmul + 1, vs1, s->lmul);
460}
461
462/*
463 * Check function for vector instruction with format:
464 * double-width result and double-width source1 and single-width
465 * source2 (2*SEW = 2*SEW op SEW)
466 *
467 * Rules to be checked here:
468 *   1. All rules in defined in widen common rules are applied.
469 *   2. Source 1 (vs2) vector register number is multiples of 2 * LMUL.
470 *      (Section 3.4.2)
471 *   3. Source 2 (vs1) vector register number is multiples of LMUL.
472 *      (Section 3.4.2)
473 *   4. Destination vector register cannot overlap a source vector
474 *      register (vs1) group.
475 *      (Section 5.2)
476 */
477static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm)
478{
479    return vext_check_ds(s, vd, vs1, vm) &&
480        require_align(vs2, s->lmul + 1);
481}
482
483static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm)
484{
485    bool ret = vext_narrow_check_common(s, vd, vs, vm);
486    if (vd != vs) {
487        ret &= require_noover(vd, s->lmul, vs, s->lmul + 1);
488    }
489    return ret;
490}
491
492/*
493 * Check function for vector instruction with format:
494 * single-width result and double-width source 1 and single-width
495 * source 2 (SEW = 2*SEW op SEW)
496 *
497 * Rules to be checked here:
498 *   1. All rules in defined in narrow common rules are applied.
499 *   2. Destination vector register cannot overlap a source vector
500 *      register (vs2) group.
501 *      (Section 5.2)
502 *   3. Source 2 (vs1) vector register number is multiples of LMUL.
503 *      (Section 3.4.2)
504 */
505static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
506{
507    return vext_check_sd(s, vd, vs2, vm) &&
508        require_align(vs1, s->lmul);
509}
510
511/*
512 * Check function for vector reduction instructions.
513 *
514 * Rules to be checked here:
515 *   1. Source 1 (vs2) vector register number is multiples of LMUL.
516 *      (Section 3.4.2)
517 */
518static bool vext_check_reduction(DisasContext *s, int vs2)
519{
520    return require_align(vs2, s->lmul) && (s->vstart == 0);
521}
522
523/*
524 * Check function for vector slide instructions.
525 *
526 * Rules to be checked here:
527 *   1. Source 1 (vs2) vector register number is multiples of LMUL.
528 *      (Section 3.4.2)
529 *   2. Destination vector register number is multiples of LMUL.
530 *      (Section 3.4.2)
531 *   3. Destination vector register group for a masked vector
532 *      instruction cannot overlap the source mask register (v0).
533 *      (Section 5.3)
534 *   4. The destination vector register group for vslideup, vslide1up,
535 *      vfslide1up, cannot overlap the source vector register (vs2) group.
536 *      (Section 5.2, 16.3.1, 16.3.3)
537 */
538static bool vext_check_slide(DisasContext *s, int vd, int vs2,
539                             int vm, bool is_over)
540{
541    bool ret = require_align(vs2, s->lmul) &&
542               require_align(vd, s->lmul) &&
543               require_vm(vm, vd);
544    if (is_over) {
545        ret &= (vd != vs2);
546    }
547    return ret;
548}
549
550/*
551 * In cpu_get_tb_cpu_state(), set VILL if RVV was not present.
552 * So RVV is also be checked in this function.
553 */
554static bool vext_check_isa_ill(DisasContext *s)
555{
556    return !s->vill;
557}
558
559/* common translation macro */
560#define GEN_VEXT_TRANS(NAME, EEW, ARGTYPE, OP, CHECK)        \
561static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE * a) \
562{                                                            \
563    if (CHECK(s, a, EEW)) {                                  \
564        return OP(s, a, EEW);                                \
565    }                                                        \
566    return false;                                            \
567}
568
569static uint8_t vext_get_emul(DisasContext *s, uint8_t eew)
570{
571    int8_t emul = eew - s->sew + s->lmul;
572    return emul < 0 ? 0 : emul;
573}
574
575/*
576 *** unit stride load and store
577 */
578typedef void gen_helper_ldst_us(TCGv_ptr, TCGv_ptr, TCGv,
579                                TCGv_env, TCGv_i32);
580
581static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
582                          gen_helper_ldst_us *fn, DisasContext *s,
583                          bool is_store)
584{
585    TCGv_ptr dest, mask;
586    TCGv base;
587    TCGv_i32 desc;
588
589    TCGLabel *over = gen_new_label();
590    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
591
592    dest = tcg_temp_new_ptr();
593    mask = tcg_temp_new_ptr();
594    base = get_gpr(s, rs1, EXT_NONE);
595
596    /*
597     * As simd_desc supports at most 2048 bytes, and in this implementation,
598     * the max vector group length is 4096 bytes. So split it into two parts.
599     *
600     * The first part is vlen in bytes, encoded in maxsz of simd_desc.
601     * The second part is lmul, encoded in data of simd_desc.
602     */
603    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
604
605    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
606    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
607
608    fn(dest, mask, base, cpu_env, desc);
609
610    tcg_temp_free_ptr(dest);
611    tcg_temp_free_ptr(mask);
612
613    if (!is_store) {
614        mark_vs_dirty(s);
615    }
616
617    gen_set_label(over);
618    return true;
619}
620
621static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
622{
623    uint32_t data = 0;
624    gen_helper_ldst_us *fn;
625    static gen_helper_ldst_us * const fns[2][4] = {
626        /* masked unit stride load */
627        { gen_helper_vle8_v_mask, gen_helper_vle16_v_mask,
628          gen_helper_vle32_v_mask, gen_helper_vle64_v_mask },
629        /* unmasked unit stride load */
630        { gen_helper_vle8_v, gen_helper_vle16_v,
631          gen_helper_vle32_v, gen_helper_vle64_v }
632    };
633
634    fn =  fns[a->vm][eew];
635    if (fn == NULL) {
636        return false;
637    }
638
639    /*
640     * Vector load/store instructions have the EEW encoded
641     * directly in the instructions. The maximum vector size is
642     * calculated with EMUL rather than LMUL.
643     */
644    uint8_t emul = vext_get_emul(s, eew);
645    data = FIELD_DP32(data, VDATA, VM, a->vm);
646    data = FIELD_DP32(data, VDATA, LMUL, emul);
647    data = FIELD_DP32(data, VDATA, NF, a->nf);
648    return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
649}
650
651static bool ld_us_check(DisasContext *s, arg_r2nfvm* a, uint8_t eew)
652{
653    return require_rvv(s) &&
654           vext_check_isa_ill(s) &&
655           vext_check_load(s, a->rd, a->nf, a->vm, eew);
656}
657
658GEN_VEXT_TRANS(vle8_v,  MO_8,  r2nfvm, ld_us_op, ld_us_check)
659GEN_VEXT_TRANS(vle16_v, MO_16, r2nfvm, ld_us_op, ld_us_check)
660GEN_VEXT_TRANS(vle32_v, MO_32, r2nfvm, ld_us_op, ld_us_check)
661GEN_VEXT_TRANS(vle64_v, MO_64, r2nfvm, ld_us_op, ld_us_check)
662
663static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
664{
665    uint32_t data = 0;
666    gen_helper_ldst_us *fn;
667    static gen_helper_ldst_us * const fns[2][4] = {
668        /* masked unit stride store */
669        { gen_helper_vse8_v_mask, gen_helper_vse16_v_mask,
670          gen_helper_vse32_v_mask, gen_helper_vse64_v_mask },
671        /* unmasked unit stride store */
672        { gen_helper_vse8_v, gen_helper_vse16_v,
673          gen_helper_vse32_v, gen_helper_vse64_v }
674    };
675
676    fn =  fns[a->vm][eew];
677    if (fn == NULL) {
678        return false;
679    }
680
681    uint8_t emul = vext_get_emul(s, eew);
682    data = FIELD_DP32(data, VDATA, VM, a->vm);
683    data = FIELD_DP32(data, VDATA, LMUL, emul);
684    data = FIELD_DP32(data, VDATA, NF, a->nf);
685    return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
686}
687
688static bool st_us_check(DisasContext *s, arg_r2nfvm* a, uint8_t eew)
689{
690    return require_rvv(s) &&
691           vext_check_isa_ill(s) &&
692           vext_check_store(s, a->rd, a->nf, eew);
693}
694
695GEN_VEXT_TRANS(vse8_v,  MO_8,  r2nfvm, st_us_op, st_us_check)
696GEN_VEXT_TRANS(vse16_v, MO_16, r2nfvm, st_us_op, st_us_check)
697GEN_VEXT_TRANS(vse32_v, MO_32, r2nfvm, st_us_op, st_us_check)
698GEN_VEXT_TRANS(vse64_v, MO_64, r2nfvm, st_us_op, st_us_check)
699
700/*
701 *** unit stride mask load and store
702 */
703static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew)
704{
705    uint32_t data = 0;
706    gen_helper_ldst_us *fn = gen_helper_vlm_v;
707
708    /* EMUL = 1, NFIELDS = 1 */
709    data = FIELD_DP32(data, VDATA, LMUL, 0);
710    data = FIELD_DP32(data, VDATA, NF, 1);
711    return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
712}
713
714static bool ld_us_mask_check(DisasContext *s, arg_vlm_v *a, uint8_t eew)
715{
716    /* EMUL = 1, NFIELDS = 1 */
717    return require_rvv(s) && vext_check_isa_ill(s);
718}
719
720static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, uint8_t eew)
721{
722    uint32_t data = 0;
723    gen_helper_ldst_us *fn = gen_helper_vsm_v;
724
725    /* EMUL = 1, NFIELDS = 1 */
726    data = FIELD_DP32(data, VDATA, LMUL, 0);
727    data = FIELD_DP32(data, VDATA, NF, 1);
728    return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
729}
730
731static bool st_us_mask_check(DisasContext *s, arg_vsm_v *a, uint8_t eew)
732{
733    /* EMUL = 1, NFIELDS = 1 */
734    return require_rvv(s) && vext_check_isa_ill(s);
735}
736
737GEN_VEXT_TRANS(vlm_v, MO_8, vlm_v, ld_us_mask_op, ld_us_mask_check)
738GEN_VEXT_TRANS(vsm_v, MO_8, vsm_v, st_us_mask_op, st_us_mask_check)
739
740/*
741 *** stride load and store
742 */
743typedef void gen_helper_ldst_stride(TCGv_ptr, TCGv_ptr, TCGv,
744                                    TCGv, TCGv_env, TCGv_i32);
745
746static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
747                              uint32_t data, gen_helper_ldst_stride *fn,
748                              DisasContext *s, bool is_store)
749{
750    TCGv_ptr dest, mask;
751    TCGv base, stride;
752    TCGv_i32 desc;
753
754    TCGLabel *over = gen_new_label();
755    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
756
757    dest = tcg_temp_new_ptr();
758    mask = tcg_temp_new_ptr();
759    base = get_gpr(s, rs1, EXT_NONE);
760    stride = get_gpr(s, rs2, EXT_NONE);
761    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
762
763    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
764    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
765
766    fn(dest, mask, base, stride, cpu_env, desc);
767
768    tcg_temp_free_ptr(dest);
769    tcg_temp_free_ptr(mask);
770
771    if (!is_store) {
772        mark_vs_dirty(s);
773    }
774
775    gen_set_label(over);
776    return true;
777}
778
779static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
780{
781    uint32_t data = 0;
782    gen_helper_ldst_stride *fn;
783    static gen_helper_ldst_stride * const fns[4] = {
784        gen_helper_vlse8_v, gen_helper_vlse16_v,
785        gen_helper_vlse32_v, gen_helper_vlse64_v
786    };
787
788    fn = fns[eew];
789    if (fn == NULL) {
790        return false;
791    }
792
793    uint8_t emul = vext_get_emul(s, eew);
794    data = FIELD_DP32(data, VDATA, VM, a->vm);
795    data = FIELD_DP32(data, VDATA, LMUL, emul);
796    data = FIELD_DP32(data, VDATA, NF, a->nf);
797    return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
798}
799
800static bool ld_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
801{
802    return require_rvv(s) &&
803           vext_check_isa_ill(s) &&
804           vext_check_load(s, a->rd, a->nf, a->vm, eew);
805}
806
807GEN_VEXT_TRANS(vlse8_v,  MO_8,  rnfvm, ld_stride_op, ld_stride_check)
808GEN_VEXT_TRANS(vlse16_v, MO_16, rnfvm, ld_stride_op, ld_stride_check)
809GEN_VEXT_TRANS(vlse32_v, MO_32, rnfvm, ld_stride_op, ld_stride_check)
810GEN_VEXT_TRANS(vlse64_v, MO_64, rnfvm, ld_stride_op, ld_stride_check)
811
812static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
813{
814    uint32_t data = 0;
815    gen_helper_ldst_stride *fn;
816    static gen_helper_ldst_stride * const fns[4] = {
817        /* masked stride store */
818        gen_helper_vsse8_v,  gen_helper_vsse16_v,
819        gen_helper_vsse32_v,  gen_helper_vsse64_v
820    };
821
822    uint8_t emul = vext_get_emul(s, eew);
823    data = FIELD_DP32(data, VDATA, VM, a->vm);
824    data = FIELD_DP32(data, VDATA, LMUL, emul);
825    data = FIELD_DP32(data, VDATA, NF, a->nf);
826    fn = fns[eew];
827    if (fn == NULL) {
828        return false;
829    }
830
831    return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
832}
833
834static bool st_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
835{
836    return require_rvv(s) &&
837           vext_check_isa_ill(s) &&
838           vext_check_store(s, a->rd, a->nf, eew);
839}
840
841GEN_VEXT_TRANS(vsse8_v,  MO_8,  rnfvm, st_stride_op, st_stride_check)
842GEN_VEXT_TRANS(vsse16_v, MO_16, rnfvm, st_stride_op, st_stride_check)
843GEN_VEXT_TRANS(vsse32_v, MO_32, rnfvm, st_stride_op, st_stride_check)
844GEN_VEXT_TRANS(vsse64_v, MO_64, rnfvm, st_stride_op, st_stride_check)
845
846/*
847 *** index load and store
848 */
849typedef void gen_helper_ldst_index(TCGv_ptr, TCGv_ptr, TCGv,
850                                   TCGv_ptr, TCGv_env, TCGv_i32);
851
852static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
853                             uint32_t data, gen_helper_ldst_index *fn,
854                             DisasContext *s, bool is_store)
855{
856    TCGv_ptr dest, mask, index;
857    TCGv base;
858    TCGv_i32 desc;
859
860    TCGLabel *over = gen_new_label();
861    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
862
863    dest = tcg_temp_new_ptr();
864    mask = tcg_temp_new_ptr();
865    index = tcg_temp_new_ptr();
866    base = get_gpr(s, rs1, EXT_NONE);
867    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
868
869    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
870    tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2));
871    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
872
873    fn(dest, mask, base, index, cpu_env, desc);
874
875    tcg_temp_free_ptr(dest);
876    tcg_temp_free_ptr(mask);
877    tcg_temp_free_ptr(index);
878
879    if (!is_store) {
880        mark_vs_dirty(s);
881    }
882
883    gen_set_label(over);
884    return true;
885}
886
887static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
888{
889    uint32_t data = 0;
890    gen_helper_ldst_index *fn;
891    static gen_helper_ldst_index * const fns[4][4] = {
892        /*
893         * offset vector register group EEW = 8,
894         * data vector register group EEW = SEW
895         */
896        { gen_helper_vlxei8_8_v,  gen_helper_vlxei8_16_v,
897          gen_helper_vlxei8_32_v, gen_helper_vlxei8_64_v },
898        /*
899         * offset vector register group EEW = 16,
900         * data vector register group EEW = SEW
901         */
902        { gen_helper_vlxei16_8_v, gen_helper_vlxei16_16_v,
903          gen_helper_vlxei16_32_v, gen_helper_vlxei16_64_v },
904        /*
905         * offset vector register group EEW = 32,
906         * data vector register group EEW = SEW
907         */
908        { gen_helper_vlxei32_8_v, gen_helper_vlxei32_16_v,
909          gen_helper_vlxei32_32_v, gen_helper_vlxei32_64_v },
910        /*
911         * offset vector register group EEW = 64,
912         * data vector register group EEW = SEW
913         */
914        { gen_helper_vlxei64_8_v, gen_helper_vlxei64_16_v,
915          gen_helper_vlxei64_32_v, gen_helper_vlxei64_64_v }
916    };
917
918    fn = fns[eew][s->sew];
919
920    uint8_t emul = vext_get_emul(s, s->sew);
921    data = FIELD_DP32(data, VDATA, VM, a->vm);
922    data = FIELD_DP32(data, VDATA, LMUL, emul);
923    data = FIELD_DP32(data, VDATA, NF, a->nf);
924    return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
925}
926
927static bool ld_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
928{
929    return require_rvv(s) &&
930           vext_check_isa_ill(s) &&
931           vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew);
932}
933
934GEN_VEXT_TRANS(vlxei8_v,  MO_8,  rnfvm, ld_index_op, ld_index_check)
935GEN_VEXT_TRANS(vlxei16_v, MO_16, rnfvm, ld_index_op, ld_index_check)
936GEN_VEXT_TRANS(vlxei32_v, MO_32, rnfvm, ld_index_op, ld_index_check)
937GEN_VEXT_TRANS(vlxei64_v, MO_64, rnfvm, ld_index_op, ld_index_check)
938
939static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
940{
941    uint32_t data = 0;
942    gen_helper_ldst_index *fn;
943    static gen_helper_ldst_index * const fns[4][4] = {
944        /*
945         * offset vector register group EEW = 8,
946         * data vector register group EEW = SEW
947         */
948        { gen_helper_vsxei8_8_v,  gen_helper_vsxei8_16_v,
949          gen_helper_vsxei8_32_v, gen_helper_vsxei8_64_v },
950        /*
951         * offset vector register group EEW = 16,
952         * data vector register group EEW = SEW
953         */
954        { gen_helper_vsxei16_8_v, gen_helper_vsxei16_16_v,
955          gen_helper_vsxei16_32_v, gen_helper_vsxei16_64_v },
956        /*
957         * offset vector register group EEW = 32,
958         * data vector register group EEW = SEW
959         */
960        { gen_helper_vsxei32_8_v, gen_helper_vsxei32_16_v,
961          gen_helper_vsxei32_32_v, gen_helper_vsxei32_64_v },
962        /*
963         * offset vector register group EEW = 64,
964         * data vector register group EEW = SEW
965         */
966        { gen_helper_vsxei64_8_v, gen_helper_vsxei64_16_v,
967          gen_helper_vsxei64_32_v, gen_helper_vsxei64_64_v }
968    };
969
970    fn = fns[eew][s->sew];
971
972    uint8_t emul = vext_get_emul(s, s->sew);
973    data = FIELD_DP32(data, VDATA, VM, a->vm);
974    data = FIELD_DP32(data, VDATA, LMUL, emul);
975    data = FIELD_DP32(data, VDATA, NF, a->nf);
976    return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
977}
978
979static bool st_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
980{
981    return require_rvv(s) &&
982           vext_check_isa_ill(s) &&
983           vext_check_st_index(s, a->rd, a->rs2, a->nf, eew);
984}
985
986GEN_VEXT_TRANS(vsxei8_v,  MO_8,  rnfvm, st_index_op, st_index_check)
987GEN_VEXT_TRANS(vsxei16_v, MO_16, rnfvm, st_index_op, st_index_check)
988GEN_VEXT_TRANS(vsxei32_v, MO_32, rnfvm, st_index_op, st_index_check)
989GEN_VEXT_TRANS(vsxei64_v, MO_64, rnfvm, st_index_op, st_index_check)
990
991/*
992 *** unit stride fault-only-first load
993 */
994static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
995                       gen_helper_ldst_us *fn, DisasContext *s)
996{
997    TCGv_ptr dest, mask;
998    TCGv base;
999    TCGv_i32 desc;
1000
1001    TCGLabel *over = gen_new_label();
1002    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1003
1004    dest = tcg_temp_new_ptr();
1005    mask = tcg_temp_new_ptr();
1006    base = get_gpr(s, rs1, EXT_NONE);
1007    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
1008
1009    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
1010    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
1011
1012    fn(dest, mask, base, cpu_env, desc);
1013
1014    tcg_temp_free_ptr(dest);
1015    tcg_temp_free_ptr(mask);
1016    mark_vs_dirty(s);
1017    gen_set_label(over);
1018    return true;
1019}
1020
1021static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
1022{
1023    uint32_t data = 0;
1024    gen_helper_ldst_us *fn;
1025    static gen_helper_ldst_us * const fns[4] = {
1026        gen_helper_vle8ff_v, gen_helper_vle16ff_v,
1027        gen_helper_vle32ff_v, gen_helper_vle64ff_v
1028    };
1029
1030    fn = fns[eew];
1031    if (fn == NULL) {
1032        return false;
1033    }
1034
1035    uint8_t emul = vext_get_emul(s, eew);
1036    data = FIELD_DP32(data, VDATA, VM, a->vm);
1037    data = FIELD_DP32(data, VDATA, LMUL, emul);
1038    data = FIELD_DP32(data, VDATA, NF, a->nf);
1039    return ldff_trans(a->rd, a->rs1, data, fn, s);
1040}
1041
1042GEN_VEXT_TRANS(vle8ff_v,  MO_8,  r2nfvm, ldff_op, ld_us_check)
1043GEN_VEXT_TRANS(vle16ff_v, MO_16, r2nfvm, ldff_op, ld_us_check)
1044GEN_VEXT_TRANS(vle32ff_v, MO_32, r2nfvm, ldff_op, ld_us_check)
1045GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check)
1046
1047/*
1048 * load and store whole register instructions
1049 */
1050typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32);
1051
1052static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
1053                             gen_helper_ldst_whole *fn, DisasContext *s,
1054                             bool is_store)
1055{
1056    TCGv_ptr dest;
1057    TCGv base;
1058    TCGv_i32 desc;
1059
1060    uint32_t data = FIELD_DP32(0, VDATA, NF, nf);
1061    dest = tcg_temp_new_ptr();
1062    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
1063
1064    base = get_gpr(s, rs1, EXT_NONE);
1065    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
1066
1067    fn(dest, base, cpu_env, desc);
1068
1069    tcg_temp_free_ptr(dest);
1070
1071    if (!is_store) {
1072        mark_vs_dirty(s);
1073    }
1074
1075    return true;
1076}
1077
1078/*
1079 * load and store whole register instructions ignore vtype and vl setting.
1080 * Thus, we don't need to check vill bit. (Section 7.9)
1081 */
1082#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF, IS_STORE)                      \
1083static bool trans_##NAME(DisasContext *s, arg_##NAME * a)                 \
1084{                                                                         \
1085    if (require_rvv(s) &&                                                 \
1086        QEMU_IS_ALIGNED(a->rd, ARG_NF)) {                                 \
1087        return ldst_whole_trans(a->rd, a->rs1, ARG_NF, gen_helper_##NAME, \
1088                                s, IS_STORE);                             \
1089    }                                                                     \
1090    return false;                                                         \
1091}
1092
1093GEN_LDST_WHOLE_TRANS(vl1re8_v,  1, false)
1094GEN_LDST_WHOLE_TRANS(vl1re16_v, 1, false)
1095GEN_LDST_WHOLE_TRANS(vl1re32_v, 1, false)
1096GEN_LDST_WHOLE_TRANS(vl1re64_v, 1, false)
1097GEN_LDST_WHOLE_TRANS(vl2re8_v,  2, false)
1098GEN_LDST_WHOLE_TRANS(vl2re16_v, 2, false)
1099GEN_LDST_WHOLE_TRANS(vl2re32_v, 2, false)
1100GEN_LDST_WHOLE_TRANS(vl2re64_v, 2, false)
1101GEN_LDST_WHOLE_TRANS(vl4re8_v,  4, false)
1102GEN_LDST_WHOLE_TRANS(vl4re16_v, 4, false)
1103GEN_LDST_WHOLE_TRANS(vl4re32_v, 4, false)
1104GEN_LDST_WHOLE_TRANS(vl4re64_v, 4, false)
1105GEN_LDST_WHOLE_TRANS(vl8re8_v,  8, false)
1106GEN_LDST_WHOLE_TRANS(vl8re16_v, 8, false)
1107GEN_LDST_WHOLE_TRANS(vl8re32_v, 8, false)
1108GEN_LDST_WHOLE_TRANS(vl8re64_v, 8, false)
1109
1110GEN_LDST_WHOLE_TRANS(vs1r_v, 1, true)
1111GEN_LDST_WHOLE_TRANS(vs2r_v, 2, true)
1112GEN_LDST_WHOLE_TRANS(vs4r_v, 4, true)
1113GEN_LDST_WHOLE_TRANS(vs8r_v, 8, true)
1114
1115/*
1116 *** Vector Integer Arithmetic Instructions
1117 */
1118
1119/*
1120 * MAXSZ returns the maximum vector size can be operated in bytes,
1121 * which is used in GVEC IR when vl_eq_vlmax flag is set to true
1122 * to accerlate vector operation.
1123 */
1124static inline uint32_t MAXSZ(DisasContext *s)
1125{
1126    int scale = s->lmul - 3;
1127    return scale < 0 ? s->vlen >> -scale : s->vlen << scale;
1128}
1129
1130static bool opivv_check(DisasContext *s, arg_rmrr *a)
1131{
1132    return require_rvv(s) &&
1133           vext_check_isa_ill(s) &&
1134           vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
1135}
1136
1137typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
1138                        uint32_t, uint32_t, uint32_t);
1139
1140static inline bool
1141do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
1142              gen_helper_gvec_4_ptr *fn)
1143{
1144    TCGLabel *over = gen_new_label();
1145    if (!opivv_check(s, a)) {
1146        return false;
1147    }
1148
1149    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1150
1151    if (a->vm && s->vl_eq_vlmax) {
1152        gvec_fn(s->sew, vreg_ofs(s, a->rd),
1153                vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
1154                MAXSZ(s), MAXSZ(s));
1155    } else {
1156        uint32_t data = 0;
1157
1158        data = FIELD_DP32(data, VDATA, VM, a->vm);
1159        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1160        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
1161                           vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
1162                           cpu_env, s->vlen / 8, s->vlen / 8, data, fn);
1163    }
1164    mark_vs_dirty(s);
1165    gen_set_label(over);
1166    return true;
1167}
1168
1169/* OPIVV with GVEC IR */
1170#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \
1171static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
1172{                                                                  \
1173    static gen_helper_gvec_4_ptr * const fns[4] = {                \
1174        gen_helper_##NAME##_b, gen_helper_##NAME##_h,              \
1175        gen_helper_##NAME##_w, gen_helper_##NAME##_d,              \
1176    };                                                             \
1177    return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);   \
1178}
1179
1180GEN_OPIVV_GVEC_TRANS(vadd_vv, add)
1181GEN_OPIVV_GVEC_TRANS(vsub_vv, sub)
1182
1183typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr,
1184                              TCGv_env, TCGv_i32);
1185
1186static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
1187                        gen_helper_opivx *fn, DisasContext *s)
1188{
1189    TCGv_ptr dest, src2, mask;
1190    TCGv src1;
1191    TCGv_i32 desc;
1192    uint32_t data = 0;
1193
1194    TCGLabel *over = gen_new_label();
1195    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1196
1197    dest = tcg_temp_new_ptr();
1198    mask = tcg_temp_new_ptr();
1199    src2 = tcg_temp_new_ptr();
1200    src1 = get_gpr(s, rs1, EXT_NONE);
1201
1202    data = FIELD_DP32(data, VDATA, VM, vm);
1203    data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1204    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
1205
1206    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
1207    tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
1208    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
1209
1210    fn(dest, mask, src1, src2, cpu_env, desc);
1211
1212    tcg_temp_free_ptr(dest);
1213    tcg_temp_free_ptr(mask);
1214    tcg_temp_free_ptr(src2);
1215    mark_vs_dirty(s);
1216    gen_set_label(over);
1217    return true;
1218}
1219
1220static bool opivx_check(DisasContext *s, arg_rmrr *a)
1221{
1222    return require_rvv(s) &&
1223           vext_check_isa_ill(s) &&
1224           vext_check_ss(s, a->rd, a->rs2, a->vm);
1225}
1226
1227typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64,
1228                         uint32_t, uint32_t);
1229
1230static inline bool
1231do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
1232              gen_helper_opivx *fn)
1233{
1234    if (!opivx_check(s, a)) {
1235        return false;
1236    }
1237
1238    if (a->vm && s->vl_eq_vlmax) {
1239        TCGv_i64 src1 = tcg_temp_new_i64();
1240
1241        tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN));
1242        gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
1243                src1, MAXSZ(s), MAXSZ(s));
1244
1245        tcg_temp_free_i64(src1);
1246        mark_vs_dirty(s);
1247        return true;
1248    }
1249    return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
1250}
1251
1252/* OPIVX with GVEC IR */
1253#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \
1254static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
1255{                                                                  \
1256    static gen_helper_opivx * const fns[4] = {                     \
1257        gen_helper_##NAME##_b, gen_helper_##NAME##_h,              \
1258        gen_helper_##NAME##_w, gen_helper_##NAME##_d,              \
1259    };                                                             \
1260    return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);   \
1261}
1262
1263GEN_OPIVX_GVEC_TRANS(vadd_vx, adds)
1264GEN_OPIVX_GVEC_TRANS(vsub_vx, subs)
1265
1266static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1267{
1268    tcg_gen_vec_sub8_i64(d, b, a);
1269}
1270
1271static void gen_vec_rsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1272{
1273    tcg_gen_vec_sub16_i64(d, b, a);
1274}
1275
1276static void gen_rsub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
1277{
1278    tcg_gen_sub_i32(ret, arg2, arg1);
1279}
1280
1281static void gen_rsub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1282{
1283    tcg_gen_sub_i64(ret, arg2, arg1);
1284}
1285
1286static void gen_rsub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
1287{
1288    tcg_gen_sub_vec(vece, r, b, a);
1289}
1290
1291static void tcg_gen_gvec_rsubs(unsigned vece, uint32_t dofs, uint32_t aofs,
1292                               TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
1293{
1294    static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
1295    static const GVecGen2s rsub_op[4] = {
1296        { .fni8 = gen_vec_rsub8_i64,
1297          .fniv = gen_rsub_vec,
1298          .fno = gen_helper_vec_rsubs8,
1299          .opt_opc = vecop_list,
1300          .vece = MO_8 },
1301        { .fni8 = gen_vec_rsub16_i64,
1302          .fniv = gen_rsub_vec,
1303          .fno = gen_helper_vec_rsubs16,
1304          .opt_opc = vecop_list,
1305          .vece = MO_16 },
1306        { .fni4 = gen_rsub_i32,
1307          .fniv = gen_rsub_vec,
1308          .fno = gen_helper_vec_rsubs32,
1309          .opt_opc = vecop_list,
1310          .vece = MO_32 },
1311        { .fni8 = gen_rsub_i64,
1312          .fniv = gen_rsub_vec,
1313          .fno = gen_helper_vec_rsubs64,
1314          .opt_opc = vecop_list,
1315          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1316          .vece = MO_64 },
1317    };
1318
1319    tcg_debug_assert(vece <= MO_64);
1320    tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
1321}
1322
1323GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs)
1324
1325typedef enum {
1326    IMM_ZX,         /* Zero-extended */
1327    IMM_SX,         /* Sign-extended */
1328    IMM_TRUNC_SEW,  /* Truncate to log(SEW) bits */
1329    IMM_TRUNC_2SEW, /* Truncate to log(2*SEW) bits */
1330} imm_mode_t;
1331
1332static int64_t extract_imm(DisasContext *s, uint32_t imm, imm_mode_t imm_mode)
1333{
1334    switch (imm_mode) {
1335    case IMM_ZX:
1336        return extract64(imm, 0, 5);
1337    case IMM_SX:
1338        return sextract64(imm, 0, 5);
1339    case IMM_TRUNC_SEW:
1340        return extract64(imm, 0, s->sew + 3);
1341    case IMM_TRUNC_2SEW:
1342        return extract64(imm, 0, s->sew + 4);
1343    default:
1344        g_assert_not_reached();
1345    }
1346}
1347
1348static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
1349                        gen_helper_opivx *fn, DisasContext *s,
1350                        imm_mode_t imm_mode)
1351{
1352    TCGv_ptr dest, src2, mask;
1353    TCGv src1;
1354    TCGv_i32 desc;
1355    uint32_t data = 0;
1356
1357    TCGLabel *over = gen_new_label();
1358    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1359
1360    dest = tcg_temp_new_ptr();
1361    mask = tcg_temp_new_ptr();
1362    src2 = tcg_temp_new_ptr();
1363    src1 = tcg_constant_tl(extract_imm(s, imm, imm_mode));
1364
1365    data = FIELD_DP32(data, VDATA, VM, vm);
1366    data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1367    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
1368
1369    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
1370    tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
1371    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
1372
1373    fn(dest, mask, src1, src2, cpu_env, desc);
1374
1375    tcg_temp_free_ptr(dest);
1376    tcg_temp_free_ptr(mask);
1377    tcg_temp_free_ptr(src2);
1378    mark_vs_dirty(s);
1379    gen_set_label(over);
1380    return true;
1381}
1382
1383typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
1384                         uint32_t, uint32_t);
1385
1386static inline bool
1387do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
1388              gen_helper_opivx *fn, imm_mode_t imm_mode)
1389{
1390    if (!opivx_check(s, a)) {
1391        return false;
1392    }
1393
1394    if (a->vm && s->vl_eq_vlmax) {
1395        gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
1396                extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s));
1397        mark_vs_dirty(s);
1398        return true;
1399    }
1400    return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, imm_mode);
1401}
1402
1403/* OPIVI with GVEC IR */
1404#define GEN_OPIVI_GVEC_TRANS(NAME, IMM_MODE, OPIVX, SUF) \
1405static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
1406{                                                                  \
1407    static gen_helper_opivx * const fns[4] = {                     \
1408        gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h,            \
1409        gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d,            \
1410    };                                                             \
1411    return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF,                 \
1412                         fns[s->sew], IMM_MODE);                   \
1413}
1414
1415GEN_OPIVI_GVEC_TRANS(vadd_vi, IMM_SX, vadd_vx, addi)
1416
1417static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs,
1418                               int64_t c, uint32_t oprsz, uint32_t maxsz)
1419{
1420    TCGv_i64 tmp = tcg_constant_i64(c);
1421    tcg_gen_gvec_rsubs(vece, dofs, aofs, tmp, oprsz, maxsz);
1422}
1423
1424GEN_OPIVI_GVEC_TRANS(vrsub_vi, IMM_SX, vrsub_vx, rsubi)
1425
1426/* Vector Widening Integer Add/Subtract */
1427
1428/* OPIVV with WIDEN */
1429static bool opivv_widen_check(DisasContext *s, arg_rmrr *a)
1430{
1431    return require_rvv(s) &&
1432           vext_check_isa_ill(s) &&
1433           vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
1434}
1435
1436static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
1437                           gen_helper_gvec_4_ptr *fn,
1438                           bool (*checkfn)(DisasContext *, arg_rmrr *))
1439{
1440    if (checkfn(s, a)) {
1441        uint32_t data = 0;
1442        TCGLabel *over = gen_new_label();
1443        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1444
1445        data = FIELD_DP32(data, VDATA, VM, a->vm);
1446        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1447        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
1448                           vreg_ofs(s, a->rs1),
1449                           vreg_ofs(s, a->rs2),
1450                           cpu_env, s->vlen / 8, s->vlen / 8,
1451                           data, fn);
1452        mark_vs_dirty(s);
1453        gen_set_label(over);
1454        return true;
1455    }
1456    return false;
1457}
1458
1459#define GEN_OPIVV_WIDEN_TRANS(NAME, CHECK) \
1460static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
1461{                                                            \
1462    static gen_helper_gvec_4_ptr * const fns[3] = {          \
1463        gen_helper_##NAME##_b,                               \
1464        gen_helper_##NAME##_h,                               \
1465        gen_helper_##NAME##_w                                \
1466    };                                                       \
1467    return do_opivv_widen(s, a, fns[s->sew], CHECK);         \
1468}
1469
1470GEN_OPIVV_WIDEN_TRANS(vwaddu_vv, opivv_widen_check)
1471GEN_OPIVV_WIDEN_TRANS(vwadd_vv, opivv_widen_check)
1472GEN_OPIVV_WIDEN_TRANS(vwsubu_vv, opivv_widen_check)
1473GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check)
1474
1475/* OPIVX with WIDEN */
1476static bool opivx_widen_check(DisasContext *s, arg_rmrr *a)
1477{
1478    return require_rvv(s) &&
1479           vext_check_isa_ill(s) &&
1480           vext_check_ds(s, a->rd, a->rs2, a->vm);
1481}
1482
1483static bool do_opivx_widen(DisasContext *s, arg_rmrr *a,
1484                           gen_helper_opivx *fn)
1485{
1486    if (opivx_widen_check(s, a)) {
1487        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
1488    }
1489    return false;
1490}
1491
1492#define GEN_OPIVX_WIDEN_TRANS(NAME) \
1493static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
1494{                                                            \
1495    static gen_helper_opivx * const fns[3] = {               \
1496        gen_helper_##NAME##_b,                               \
1497        gen_helper_##NAME##_h,                               \
1498        gen_helper_##NAME##_w                                \
1499    };                                                       \
1500    return do_opivx_widen(s, a, fns[s->sew]);                \
1501}
1502
1503GEN_OPIVX_WIDEN_TRANS(vwaddu_vx)
1504GEN_OPIVX_WIDEN_TRANS(vwadd_vx)
1505GEN_OPIVX_WIDEN_TRANS(vwsubu_vx)
1506GEN_OPIVX_WIDEN_TRANS(vwsub_vx)
1507
1508/* WIDEN OPIVV with WIDEN */
1509static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a)
1510{
1511    return require_rvv(s) &&
1512           vext_check_isa_ill(s) &&
1513           vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm);
1514}
1515
1516static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
1517                           gen_helper_gvec_4_ptr *fn)
1518{
1519    if (opiwv_widen_check(s, a)) {
1520        uint32_t data = 0;
1521        TCGLabel *over = gen_new_label();
1522        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1523
1524        data = FIELD_DP32(data, VDATA, VM, a->vm);
1525        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1526        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
1527                           vreg_ofs(s, a->rs1),
1528                           vreg_ofs(s, a->rs2),
1529                           cpu_env, s->vlen / 8, s->vlen / 8, data, fn);
1530        mark_vs_dirty(s);
1531        gen_set_label(over);
1532        return true;
1533    }
1534    return false;
1535}
1536
1537#define GEN_OPIWV_WIDEN_TRANS(NAME) \
1538static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
1539{                                                            \
1540    static gen_helper_gvec_4_ptr * const fns[3] = {          \
1541        gen_helper_##NAME##_b,                               \
1542        gen_helper_##NAME##_h,                               \
1543        gen_helper_##NAME##_w                                \
1544    };                                                       \
1545    return do_opiwv_widen(s, a, fns[s->sew]);                \
1546}
1547
1548GEN_OPIWV_WIDEN_TRANS(vwaddu_wv)
1549GEN_OPIWV_WIDEN_TRANS(vwadd_wv)
1550GEN_OPIWV_WIDEN_TRANS(vwsubu_wv)
1551GEN_OPIWV_WIDEN_TRANS(vwsub_wv)
1552
1553/* WIDEN OPIVX with WIDEN */
1554static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a)
1555{
1556    return require_rvv(s) &&
1557           vext_check_isa_ill(s) &&
1558           vext_check_dd(s, a->rd, a->rs2, a->vm);
1559}
1560
1561static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a,
1562                           gen_helper_opivx *fn)
1563{
1564    if (opiwx_widen_check(s, a)) {
1565        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
1566    }
1567    return false;
1568}
1569
1570#define GEN_OPIWX_WIDEN_TRANS(NAME) \
1571static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
1572{                                                            \
1573    static gen_helper_opivx * const fns[3] = {               \
1574        gen_helper_##NAME##_b,                               \
1575        gen_helper_##NAME##_h,                               \
1576        gen_helper_##NAME##_w                                \
1577    };                                                       \
1578    return do_opiwx_widen(s, a, fns[s->sew]);                \
1579}
1580
1581GEN_OPIWX_WIDEN_TRANS(vwaddu_wx)
1582GEN_OPIWX_WIDEN_TRANS(vwadd_wx)
1583GEN_OPIWX_WIDEN_TRANS(vwsubu_wx)
1584GEN_OPIWX_WIDEN_TRANS(vwsub_wx)
1585
1586/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1587/* OPIVV without GVEC IR */
1588#define GEN_OPIVV_TRANS(NAME, CHECK)                               \
1589static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
1590{                                                                  \
1591    if (CHECK(s, a)) {                                             \
1592        uint32_t data = 0;                                         \
1593        static gen_helper_gvec_4_ptr * const fns[4] = {            \
1594            gen_helper_##NAME##_b, gen_helper_##NAME##_h,          \
1595            gen_helper_##NAME##_w, gen_helper_##NAME##_d,          \
1596        };                                                         \
1597        TCGLabel *over = gen_new_label();                          \
1598        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
1599                                                                   \
1600        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
1601        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
1602        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
1603                           vreg_ofs(s, a->rs1),                    \
1604                           vreg_ofs(s, a->rs2), cpu_env,           \
1605                           s->vlen / 8, s->vlen / 8, data,         \
1606                           fns[s->sew]);                           \
1607        mark_vs_dirty(s);                                          \
1608        gen_set_label(over);                                       \
1609        return true;                                               \
1610    }                                                              \
1611    return false;                                                  \
1612}
1613
1614/*
1615 * For vadc and vsbc, an illegal instruction exception is raised if the
1616 * destination vector register is v0 and LMUL > 1. (Section 12.4)
1617 */
1618static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a)
1619{
1620    return require_rvv(s) &&
1621           vext_check_isa_ill(s) &&
1622           (a->rd != 0) &&
1623           vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
1624}
1625
1626GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check)
1627GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check)
1628
1629/*
1630 * For vmadc and vmsbc, an illegal instruction exception is raised if the
1631 * destination vector register overlaps a source vector register group.
1632 */
1633static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a)
1634{
1635    return require_rvv(s) &&
1636           vext_check_isa_ill(s) &&
1637           vext_check_mss(s, a->rd, a->rs1, a->rs2);
1638}
1639
1640GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check)
1641GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check)
1642
1643static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a)
1644{
1645    return require_rvv(s) &&
1646           vext_check_isa_ill(s) &&
1647           (a->rd != 0) &&
1648           vext_check_ss(s, a->rd, a->rs2, a->vm);
1649}
1650
1651/* OPIVX without GVEC IR */
1652#define GEN_OPIVX_TRANS(NAME, CHECK)                                     \
1653static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
1654{                                                                        \
1655    if (CHECK(s, a)) {                                                   \
1656        static gen_helper_opivx * const fns[4] = {                       \
1657            gen_helper_##NAME##_b, gen_helper_##NAME##_h,                \
1658            gen_helper_##NAME##_w, gen_helper_##NAME##_d,                \
1659        };                                                               \
1660                                                                         \
1661        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
1662    }                                                                    \
1663    return false;                                                        \
1664}
1665
1666GEN_OPIVX_TRANS(vadc_vxm, opivx_vadc_check)
1667GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check)
1668
1669static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a)
1670{
1671    return require_rvv(s) &&
1672           vext_check_isa_ill(s) &&
1673           vext_check_ms(s, a->rd, a->rs2);
1674}
1675
1676GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check)
1677GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check)
1678
1679/* OPIVI without GVEC IR */
1680#define GEN_OPIVI_TRANS(NAME, IMM_MODE, OPIVX, CHECK)                    \
1681static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
1682{                                                                        \
1683    if (CHECK(s, a)) {                                                   \
1684        static gen_helper_opivx * const fns[4] = {                       \
1685            gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h,              \
1686            gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d,              \
1687        };                                                               \
1688        return opivi_trans(a->rd, a->rs1, a->rs2, a->vm,                 \
1689                           fns[s->sew], s, IMM_MODE);                    \
1690    }                                                                    \
1691    return false;                                                        \
1692}
1693
1694GEN_OPIVI_TRANS(vadc_vim, IMM_SX, vadc_vxm, opivx_vadc_check)
1695GEN_OPIVI_TRANS(vmadc_vim, IMM_SX, vmadc_vxm, opivx_vmadc_check)
1696
1697/* Vector Bitwise Logical Instructions */
1698GEN_OPIVV_GVEC_TRANS(vand_vv, and)
1699GEN_OPIVV_GVEC_TRANS(vor_vv,  or)
1700GEN_OPIVV_GVEC_TRANS(vxor_vv, xor)
1701GEN_OPIVX_GVEC_TRANS(vand_vx, ands)
1702GEN_OPIVX_GVEC_TRANS(vor_vx,  ors)
1703GEN_OPIVX_GVEC_TRANS(vxor_vx, xors)
1704GEN_OPIVI_GVEC_TRANS(vand_vi, IMM_SX, vand_vx, andi)
1705GEN_OPIVI_GVEC_TRANS(vor_vi, IMM_SX, vor_vx,  ori)
1706GEN_OPIVI_GVEC_TRANS(vxor_vi, IMM_SX, vxor_vx, xori)
1707
1708/* Vector Single-Width Bit Shift Instructions */
1709GEN_OPIVV_GVEC_TRANS(vsll_vv,  shlv)
1710GEN_OPIVV_GVEC_TRANS(vsrl_vv,  shrv)
1711GEN_OPIVV_GVEC_TRANS(vsra_vv,  sarv)
1712
1713typedef void GVecGen2sFn32(unsigned, uint32_t, uint32_t, TCGv_i32,
1714                           uint32_t, uint32_t);
1715
1716static inline bool
1717do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
1718                    gen_helper_opivx *fn)
1719{
1720    if (!opivx_check(s, a)) {
1721        return false;
1722    }
1723
1724    if (a->vm && s->vl_eq_vlmax) {
1725        TCGv_i32 src1 = tcg_temp_new_i32();
1726
1727        tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE));
1728        tcg_gen_extract_i32(src1, src1, 0, s->sew + 3);
1729        gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
1730                src1, MAXSZ(s), MAXSZ(s));
1731
1732        tcg_temp_free_i32(src1);
1733        mark_vs_dirty(s);
1734        return true;
1735    }
1736    return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
1737}
1738
1739#define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \
1740static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                    \
1741{                                                                         \
1742    static gen_helper_opivx * const fns[4] = {                            \
1743        gen_helper_##NAME##_b, gen_helper_##NAME##_h,                     \
1744        gen_helper_##NAME##_w, gen_helper_##NAME##_d,                     \
1745    };                                                                    \
1746                                                                          \
1747    return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);    \
1748}
1749
1750GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx,  shls)
1751GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx,  shrs)
1752GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx,  sars)
1753
1754GEN_OPIVI_GVEC_TRANS(vsll_vi, IMM_TRUNC_SEW, vsll_vx, shli)
1755GEN_OPIVI_GVEC_TRANS(vsrl_vi, IMM_TRUNC_SEW, vsrl_vx, shri)
1756GEN_OPIVI_GVEC_TRANS(vsra_vi, IMM_TRUNC_SEW, vsra_vx, sari)
1757
1758/* Vector Narrowing Integer Right Shift Instructions */
1759static bool opiwv_narrow_check(DisasContext *s, arg_rmrr *a)
1760{
1761    return require_rvv(s) &&
1762           vext_check_isa_ill(s) &&
1763           vext_check_sds(s, a->rd, a->rs1, a->rs2, a->vm);
1764}
1765
1766/* OPIVV with NARROW */
1767#define GEN_OPIWV_NARROW_TRANS(NAME)                               \
1768static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
1769{                                                                  \
1770    if (opiwv_narrow_check(s, a)) {                                \
1771        uint32_t data = 0;                                         \
1772        static gen_helper_gvec_4_ptr * const fns[3] = {            \
1773            gen_helper_##NAME##_b,                                 \
1774            gen_helper_##NAME##_h,                                 \
1775            gen_helper_##NAME##_w,                                 \
1776        };                                                         \
1777        TCGLabel *over = gen_new_label();                          \
1778        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
1779                                                                   \
1780        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
1781        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
1782        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
1783                           vreg_ofs(s, a->rs1),                    \
1784                           vreg_ofs(s, a->rs2), cpu_env,           \
1785                           s->vlen / 8, s->vlen / 8, data,         \
1786                           fns[s->sew]);                           \
1787        mark_vs_dirty(s);                                          \
1788        gen_set_label(over);                                       \
1789        return true;                                               \
1790    }                                                              \
1791    return false;                                                  \
1792}
1793GEN_OPIWV_NARROW_TRANS(vnsra_wv)
1794GEN_OPIWV_NARROW_TRANS(vnsrl_wv)
1795
1796static bool opiwx_narrow_check(DisasContext *s, arg_rmrr *a)
1797{
1798    return require_rvv(s) &&
1799           vext_check_isa_ill(s) &&
1800           vext_check_sd(s, a->rd, a->rs2, a->vm);
1801}
1802
1803/* OPIVX with NARROW */
1804#define GEN_OPIWX_NARROW_TRANS(NAME)                                     \
1805static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
1806{                                                                        \
1807    if (opiwx_narrow_check(s, a)) {                                      \
1808        static gen_helper_opivx * const fns[3] = {                       \
1809            gen_helper_##NAME##_b,                                       \
1810            gen_helper_##NAME##_h,                                       \
1811            gen_helper_##NAME##_w,                                       \
1812        };                                                               \
1813        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
1814    }                                                                    \
1815    return false;                                                        \
1816}
1817
1818GEN_OPIWX_NARROW_TRANS(vnsra_wx)
1819GEN_OPIWX_NARROW_TRANS(vnsrl_wx)
1820
1821/* OPIWI with NARROW */
1822#define GEN_OPIWI_NARROW_TRANS(NAME, IMM_MODE, OPIVX)                    \
1823static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
1824{                                                                        \
1825    if (opiwx_narrow_check(s, a)) {                                      \
1826        static gen_helper_opivx * const fns[3] = {                       \
1827            gen_helper_##OPIVX##_b,                                      \
1828            gen_helper_##OPIVX##_h,                                      \
1829            gen_helper_##OPIVX##_w,                                      \
1830        };                                                               \
1831        return opivi_trans(a->rd, a->rs1, a->rs2, a->vm,                 \
1832                           fns[s->sew], s, IMM_MODE);                    \
1833    }                                                                    \
1834    return false;                                                        \
1835}
1836
1837GEN_OPIWI_NARROW_TRANS(vnsra_wi, IMM_ZX, vnsra_wx)
1838GEN_OPIWI_NARROW_TRANS(vnsrl_wi, IMM_ZX, vnsrl_wx)
1839
1840/* Vector Integer Comparison Instructions */
1841/*
1842 * For all comparison instructions, an illegal instruction exception is raised
1843 * if the destination vector register overlaps a source vector register group
1844 * and LMUL > 1.
1845 */
1846static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a)
1847{
1848    return require_rvv(s) &&
1849           vext_check_isa_ill(s) &&
1850           vext_check_mss(s, a->rd, a->rs1, a->rs2);
1851}
1852
1853GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check)
1854GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check)
1855GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check)
1856GEN_OPIVV_TRANS(vmslt_vv, opivv_cmp_check)
1857GEN_OPIVV_TRANS(vmsleu_vv, opivv_cmp_check)
1858GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check)
1859
1860static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a)
1861{
1862    return require_rvv(s) &&
1863           vext_check_isa_ill(s) &&
1864           vext_check_ms(s, a->rd, a->rs2);
1865}
1866
1867GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check)
1868GEN_OPIVX_TRANS(vmsne_vx, opivx_cmp_check)
1869GEN_OPIVX_TRANS(vmsltu_vx, opivx_cmp_check)
1870GEN_OPIVX_TRANS(vmslt_vx, opivx_cmp_check)
1871GEN_OPIVX_TRANS(vmsleu_vx, opivx_cmp_check)
1872GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check)
1873GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check)
1874GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check)
1875
1876GEN_OPIVI_TRANS(vmseq_vi, IMM_SX, vmseq_vx, opivx_cmp_check)
1877GEN_OPIVI_TRANS(vmsne_vi, IMM_SX, vmsne_vx, opivx_cmp_check)
1878GEN_OPIVI_TRANS(vmsleu_vi, IMM_SX, vmsleu_vx, opivx_cmp_check)
1879GEN_OPIVI_TRANS(vmsle_vi, IMM_SX, vmsle_vx, opivx_cmp_check)
1880GEN_OPIVI_TRANS(vmsgtu_vi, IMM_SX, vmsgtu_vx, opivx_cmp_check)
1881GEN_OPIVI_TRANS(vmsgt_vi, IMM_SX, vmsgt_vx, opivx_cmp_check)
1882
1883/* Vector Integer Min/Max Instructions */
1884GEN_OPIVV_GVEC_TRANS(vminu_vv, umin)
1885GEN_OPIVV_GVEC_TRANS(vmin_vv,  smin)
1886GEN_OPIVV_GVEC_TRANS(vmaxu_vv, umax)
1887GEN_OPIVV_GVEC_TRANS(vmax_vv,  smax)
1888GEN_OPIVX_TRANS(vminu_vx, opivx_check)
1889GEN_OPIVX_TRANS(vmin_vx,  opivx_check)
1890GEN_OPIVX_TRANS(vmaxu_vx, opivx_check)
1891GEN_OPIVX_TRANS(vmax_vx,  opivx_check)
1892
1893/* Vector Single-Width Integer Multiply Instructions */
1894GEN_OPIVV_GVEC_TRANS(vmul_vv,  mul)
1895GEN_OPIVV_TRANS(vmulh_vv, opivv_check)
1896GEN_OPIVV_TRANS(vmulhu_vv, opivv_check)
1897GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check)
1898GEN_OPIVX_GVEC_TRANS(vmul_vx,  muls)
1899GEN_OPIVX_TRANS(vmulh_vx, opivx_check)
1900GEN_OPIVX_TRANS(vmulhu_vx, opivx_check)
1901GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check)
1902
1903/* Vector Integer Divide Instructions */
1904GEN_OPIVV_TRANS(vdivu_vv, opivv_check)
1905GEN_OPIVV_TRANS(vdiv_vv, opivv_check)
1906GEN_OPIVV_TRANS(vremu_vv, opivv_check)
1907GEN_OPIVV_TRANS(vrem_vv, opivv_check)
1908GEN_OPIVX_TRANS(vdivu_vx, opivx_check)
1909GEN_OPIVX_TRANS(vdiv_vx, opivx_check)
1910GEN_OPIVX_TRANS(vremu_vx, opivx_check)
1911GEN_OPIVX_TRANS(vrem_vx, opivx_check)
1912
1913/* Vector Widening Integer Multiply Instructions */
1914GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check)
1915GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check)
1916GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check)
1917GEN_OPIVX_WIDEN_TRANS(vwmul_vx)
1918GEN_OPIVX_WIDEN_TRANS(vwmulu_vx)
1919GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx)
1920
1921/* Vector Single-Width Integer Multiply-Add Instructions */
1922GEN_OPIVV_TRANS(vmacc_vv, opivv_check)
1923GEN_OPIVV_TRANS(vnmsac_vv, opivv_check)
1924GEN_OPIVV_TRANS(vmadd_vv, opivv_check)
1925GEN_OPIVV_TRANS(vnmsub_vv, opivv_check)
1926GEN_OPIVX_TRANS(vmacc_vx, opivx_check)
1927GEN_OPIVX_TRANS(vnmsac_vx, opivx_check)
1928GEN_OPIVX_TRANS(vmadd_vx, opivx_check)
1929GEN_OPIVX_TRANS(vnmsub_vx, opivx_check)
1930
1931/* Vector Widening Integer Multiply-Add Instructions */
1932GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check)
1933GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check)
1934GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check)
1935GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx)
1936GEN_OPIVX_WIDEN_TRANS(vwmacc_vx)
1937GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx)
1938GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx)
1939
1940/* Vector Integer Merge and Move Instructions */
1941static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
1942{
1943    if (require_rvv(s) &&
1944        vext_check_isa_ill(s) &&
1945        /* vmv.v.v has rs2 = 0 and vm = 1 */
1946        vext_check_sss(s, a->rd, a->rs1, 0, 1)) {
1947        if (s->vl_eq_vlmax) {
1948            tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd),
1949                             vreg_ofs(s, a->rs1),
1950                             MAXSZ(s), MAXSZ(s));
1951        } else {
1952            uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
1953            static gen_helper_gvec_2_ptr * const fns[4] = {
1954                gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h,
1955                gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
1956            };
1957            TCGLabel *over = gen_new_label();
1958            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1959
1960            tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
1961                               cpu_env, s->vlen / 8, s->vlen / 8, data,
1962                               fns[s->sew]);
1963            gen_set_label(over);
1964        }
1965        mark_vs_dirty(s);
1966        return true;
1967    }
1968    return false;
1969}
1970
1971typedef void gen_helper_vmv_vx(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32);
1972static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
1973{
1974    if (require_rvv(s) &&
1975        vext_check_isa_ill(s) &&
1976        /* vmv.v.x has rs2 = 0 and vm = 1 */
1977        vext_check_ss(s, a->rd, 0, 1)) {
1978        TCGv s1;
1979        TCGLabel *over = gen_new_label();
1980        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1981
1982        s1 = get_gpr(s, a->rs1, EXT_SIGN);
1983
1984        if (s->vl_eq_vlmax) {
1985            tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
1986                                MAXSZ(s), MAXSZ(s), s1);
1987        } else {
1988            TCGv_i32 desc;
1989            TCGv_i64 s1_i64 = tcg_temp_new_i64();
1990            TCGv_ptr dest = tcg_temp_new_ptr();
1991            uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
1992            static gen_helper_vmv_vx * const fns[4] = {
1993                gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
1994                gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
1995            };
1996
1997            tcg_gen_ext_tl_i64(s1_i64, s1);
1998            desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
1999            tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
2000            fns[s->sew](dest, s1_i64, cpu_env, desc);
2001
2002            tcg_temp_free_ptr(dest);
2003            tcg_temp_free_i64(s1_i64);
2004        }
2005
2006        mark_vs_dirty(s);
2007        gen_set_label(over);
2008        return true;
2009    }
2010    return false;
2011}
2012
2013static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
2014{
2015    if (require_rvv(s) &&
2016        vext_check_isa_ill(s) &&
2017        /* vmv.v.i has rs2 = 0 and vm = 1 */
2018        vext_check_ss(s, a->rd, 0, 1)) {
2019        int64_t simm = sextract64(a->rs1, 0, 5);
2020        if (s->vl_eq_vlmax) {
2021            tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd),
2022                                 MAXSZ(s), MAXSZ(s), simm);
2023            mark_vs_dirty(s);
2024        } else {
2025            TCGv_i32 desc;
2026            TCGv_i64 s1;
2027            TCGv_ptr dest;
2028            uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
2029            static gen_helper_vmv_vx * const fns[4] = {
2030                gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
2031                gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
2032            };
2033            TCGLabel *over = gen_new_label();
2034            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
2035
2036            s1 = tcg_constant_i64(simm);
2037            dest = tcg_temp_new_ptr();
2038            desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
2039            tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
2040            fns[s->sew](dest, s1, cpu_env, desc);
2041
2042            tcg_temp_free_ptr(dest);
2043            mark_vs_dirty(s);
2044            gen_set_label(over);
2045        }
2046        return true;
2047    }
2048    return false;
2049}
2050
2051GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check)
2052GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check)
2053GEN_OPIVI_TRANS(vmerge_vim, IMM_SX, vmerge_vxm, opivx_vadc_check)
2054
2055/*
2056 *** Vector Fixed-Point Arithmetic Instructions
2057 */
2058
2059/* Vector Single-Width Saturating Add and Subtract */
2060GEN_OPIVV_TRANS(vsaddu_vv, opivv_check)
2061GEN_OPIVV_TRANS(vsadd_vv,  opivv_check)
2062GEN_OPIVV_TRANS(vssubu_vv, opivv_check)
2063GEN_OPIVV_TRANS(vssub_vv,  opivv_check)
2064GEN_OPIVX_TRANS(vsaddu_vx,  opivx_check)
2065GEN_OPIVX_TRANS(vsadd_vx,  opivx_check)
2066GEN_OPIVX_TRANS(vssubu_vx,  opivx_check)
2067GEN_OPIVX_TRANS(vssub_vx,  opivx_check)
2068GEN_OPIVI_TRANS(vsaddu_vi, IMM_SX, vsaddu_vx, opivx_check)
2069GEN_OPIVI_TRANS(vsadd_vi, IMM_SX, vsadd_vx, opivx_check)
2070
2071/* Vector Single-Width Averaging Add and Subtract */
2072GEN_OPIVV_TRANS(vaadd_vv, opivv_check)
2073GEN_OPIVV_TRANS(vaaddu_vv, opivv_check)
2074GEN_OPIVV_TRANS(vasub_vv, opivv_check)
2075GEN_OPIVV_TRANS(vasubu_vv, opivv_check)
2076GEN_OPIVX_TRANS(vaadd_vx,  opivx_check)
2077GEN_OPIVX_TRANS(vaaddu_vx,  opivx_check)
2078GEN_OPIVX_TRANS(vasub_vx,  opivx_check)
2079GEN_OPIVX_TRANS(vasubu_vx,  opivx_check)
2080
2081/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2082GEN_OPIVV_TRANS(vsmul_vv, opivv_check)
2083GEN_OPIVX_TRANS(vsmul_vx,  opivx_check)
2084
2085/* Vector Single-Width Scaling Shift Instructions */
2086GEN_OPIVV_TRANS(vssrl_vv, opivv_check)
2087GEN_OPIVV_TRANS(vssra_vv, opivv_check)
2088GEN_OPIVX_TRANS(vssrl_vx,  opivx_check)
2089GEN_OPIVX_TRANS(vssra_vx,  opivx_check)
2090GEN_OPIVI_TRANS(vssrl_vi, IMM_TRUNC_SEW, vssrl_vx, opivx_check)
2091GEN_OPIVI_TRANS(vssra_vi, IMM_TRUNC_SEW, vssra_vx, opivx_check)
2092
2093/* Vector Narrowing Fixed-Point Clip Instructions */
2094GEN_OPIWV_NARROW_TRANS(vnclipu_wv)
2095GEN_OPIWV_NARROW_TRANS(vnclip_wv)
2096GEN_OPIWX_NARROW_TRANS(vnclipu_wx)
2097GEN_OPIWX_NARROW_TRANS(vnclip_wx)
2098GEN_OPIWI_NARROW_TRANS(vnclipu_wi, IMM_ZX, vnclipu_wx)
2099GEN_OPIWI_NARROW_TRANS(vnclip_wi, IMM_ZX, vnclip_wx)
2100
2101/*
2102 *** Vector Float Point Arithmetic Instructions
2103 */
2104
2105/*
2106 * As RVF-only cpus always have values NaN-boxed to 64-bits,
2107 * RVF and RVD can be treated equally.
2108 * We don't have to deal with the cases of: SEW > FLEN.
2109 *
2110 * If SEW < FLEN, check whether input fp register is a valid
2111 * NaN-boxed value, in which case the least-significant SEW bits
2112 * of the f regsiter are used, else the canonical NaN value is used.
2113 */
2114static void do_nanbox(DisasContext *s, TCGv_i64 out, TCGv_i64 in)
2115{
2116    switch (s->sew) {
2117    case 1:
2118        gen_check_nanbox_h(out, in);
2119        break;
2120    case 2:
2121        gen_check_nanbox_s(out, in);
2122        break;
2123    case 3:
2124        tcg_gen_mov_i64(out, in);
2125        break;
2126    default:
2127        g_assert_not_reached();
2128    }
2129}
2130
2131/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2132
2133/*
2134 * If the current SEW does not correspond to a supported IEEE floating-point
2135 * type, an illegal instruction exception is raised.
2136 */
2137static bool opfvv_check(DisasContext *s, arg_rmrr *a)
2138{
2139    return require_rvv(s) &&
2140           require_rvf(s) &&
2141           vext_check_isa_ill(s) &&
2142           vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
2143}
2144
2145/* OPFVV without GVEC IR */
2146#define GEN_OPFVV_TRANS(NAME, CHECK)                               \
2147static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
2148{                                                                  \
2149    if (CHECK(s, a)) {                                             \
2150        uint32_t data = 0;                                         \
2151        static gen_helper_gvec_4_ptr * const fns[3] = {            \
2152            gen_helper_##NAME##_h,                                 \
2153            gen_helper_##NAME##_w,                                 \
2154            gen_helper_##NAME##_d,                                 \
2155        };                                                         \
2156        TCGLabel *over = gen_new_label();                          \
2157        gen_set_rm(s, RISCV_FRM_DYN);                              \
2158        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2159                                                                   \
2160        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2161        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2162        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2163                           vreg_ofs(s, a->rs1),                    \
2164                           vreg_ofs(s, a->rs2), cpu_env,           \
2165                           s->vlen / 8, s->vlen / 8, data,         \
2166                           fns[s->sew - 1]);                       \
2167        mark_vs_dirty(s);                                          \
2168        gen_set_label(over);                                       \
2169        return true;                                               \
2170    }                                                              \
2171    return false;                                                  \
2172}
2173GEN_OPFVV_TRANS(vfadd_vv, opfvv_check)
2174GEN_OPFVV_TRANS(vfsub_vv, opfvv_check)
2175
2176typedef void gen_helper_opfvf(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr,
2177                              TCGv_env, TCGv_i32);
2178
2179static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
2180                        uint32_t data, gen_helper_opfvf *fn, DisasContext *s)
2181{
2182    TCGv_ptr dest, src2, mask;
2183    TCGv_i32 desc;
2184    TCGv_i64 t1;
2185
2186    TCGLabel *over = gen_new_label();
2187    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
2188
2189    dest = tcg_temp_new_ptr();
2190    mask = tcg_temp_new_ptr();
2191    src2 = tcg_temp_new_ptr();
2192    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
2193
2194    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
2195    tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
2196    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
2197
2198    /* NaN-box f[rs1] */
2199    t1 = tcg_temp_new_i64();
2200    do_nanbox(s, t1, cpu_fpr[rs1]);
2201
2202    fn(dest, mask, t1, src2, cpu_env, desc);
2203
2204    tcg_temp_free_ptr(dest);
2205    tcg_temp_free_ptr(mask);
2206    tcg_temp_free_ptr(src2);
2207    tcg_temp_free_i64(t1);
2208    mark_vs_dirty(s);
2209    gen_set_label(over);
2210    return true;
2211}
2212
2213/*
2214 * If the current SEW does not correspond to a supported IEEE floating-point
2215 * type, an illegal instruction exception is raised
2216 */
2217static bool opfvf_check(DisasContext *s, arg_rmrr *a)
2218{
2219    return require_rvv(s) &&
2220           require_rvf(s) &&
2221           vext_check_isa_ill(s) &&
2222           vext_check_ss(s, a->rd, a->rs2, a->vm);
2223}
2224
2225/* OPFVF without GVEC IR */
2226#define GEN_OPFVF_TRANS(NAME, CHECK)                              \
2227static bool trans_##NAME(DisasContext *s, arg_rmrr *a)            \
2228{                                                                 \
2229    if (CHECK(s, a)) {                                            \
2230        uint32_t data = 0;                                        \
2231        static gen_helper_opfvf *const fns[3] = {                 \
2232            gen_helper_##NAME##_h,                                \
2233            gen_helper_##NAME##_w,                                \
2234            gen_helper_##NAME##_d,                                \
2235        };                                                        \
2236        gen_set_rm(s, RISCV_FRM_DYN);                             \
2237        data = FIELD_DP32(data, VDATA, VM, a->vm);                \
2238        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);            \
2239        return opfvf_trans(a->rd, a->rs1, a->rs2, data,           \
2240                           fns[s->sew - 1], s);                   \
2241    }                                                             \
2242    return false;                                                 \
2243}
2244
2245GEN_OPFVF_TRANS(vfadd_vf,  opfvf_check)
2246GEN_OPFVF_TRANS(vfsub_vf,  opfvf_check)
2247GEN_OPFVF_TRANS(vfrsub_vf,  opfvf_check)
2248
2249/* Vector Widening Floating-Point Add/Subtract Instructions */
2250static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a)
2251{
2252    return require_rvv(s) &&
2253           require_rvf(s) &&
2254           vext_check_isa_ill(s) &&
2255           vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
2256}
2257
2258/* OPFVV with WIDEN */
2259#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK)                       \
2260static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
2261{                                                                \
2262    if (CHECK(s, a)) {                                           \
2263        uint32_t data = 0;                                       \
2264        static gen_helper_gvec_4_ptr * const fns[2] = {          \
2265            gen_helper_##NAME##_h, gen_helper_##NAME##_w,        \
2266        };                                                       \
2267        TCGLabel *over = gen_new_label();                        \
2268        gen_set_rm(s, RISCV_FRM_DYN);                            \
2269        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);        \
2270                                                                 \
2271        data = FIELD_DP32(data, VDATA, VM, a->vm);               \
2272        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
2273        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),   \
2274                           vreg_ofs(s, a->rs1),                  \
2275                           vreg_ofs(s, a->rs2), cpu_env,         \
2276                           s->vlen / 8, s->vlen / 8, data,       \
2277                           fns[s->sew - 1]);                     \
2278        mark_vs_dirty(s);                                        \
2279        gen_set_label(over);                                     \
2280        return true;                                             \
2281    }                                                            \
2282    return false;                                                \
2283}
2284
2285GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check)
2286GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check)
2287
2288static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a)
2289{
2290    return require_rvv(s) &&
2291           require_rvf(s) &&
2292           vext_check_isa_ill(s) &&
2293           vext_check_ds(s, a->rd, a->rs2, a->vm);
2294}
2295
2296/* OPFVF with WIDEN */
2297#define GEN_OPFVF_WIDEN_TRANS(NAME)                              \
2298static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
2299{                                                                \
2300    if (opfvf_widen_check(s, a)) {                               \
2301        uint32_t data = 0;                                       \
2302        static gen_helper_opfvf *const fns[2] = {                \
2303            gen_helper_##NAME##_h, gen_helper_##NAME##_w,        \
2304        };                                                       \
2305        gen_set_rm(s, RISCV_FRM_DYN);                            \
2306        data = FIELD_DP32(data, VDATA, VM, a->vm);               \
2307        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
2308        return opfvf_trans(a->rd, a->rs1, a->rs2, data,          \
2309                           fns[s->sew - 1], s);                  \
2310    }                                                            \
2311    return false;                                                \
2312}
2313
2314GEN_OPFVF_WIDEN_TRANS(vfwadd_vf)
2315GEN_OPFVF_WIDEN_TRANS(vfwsub_vf)
2316
2317static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a)
2318{
2319    return require_rvv(s) &&
2320           require_rvf(s) &&
2321           vext_check_isa_ill(s) &&
2322           vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm);
2323}
2324
2325/* WIDEN OPFVV with WIDEN */
2326#define GEN_OPFWV_WIDEN_TRANS(NAME)                                \
2327static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
2328{                                                                  \
2329    if (opfwv_widen_check(s, a)) {                                 \
2330        uint32_t data = 0;                                         \
2331        static gen_helper_gvec_4_ptr * const fns[2] = {            \
2332            gen_helper_##NAME##_h, gen_helper_##NAME##_w,          \
2333        };                                                         \
2334        TCGLabel *over = gen_new_label();                          \
2335        gen_set_rm(s, RISCV_FRM_DYN);                              \
2336        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2337                                                                   \
2338        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2339        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2340        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2341                           vreg_ofs(s, a->rs1),                    \
2342                           vreg_ofs(s, a->rs2), cpu_env,           \
2343                           s->vlen / 8, s->vlen / 8, data,         \
2344                           fns[s->sew - 1]);                       \
2345        mark_vs_dirty(s);                                          \
2346        gen_set_label(over);                                       \
2347        return true;                                               \
2348    }                                                              \
2349    return false;                                                  \
2350}
2351
2352GEN_OPFWV_WIDEN_TRANS(vfwadd_wv)
2353GEN_OPFWV_WIDEN_TRANS(vfwsub_wv)
2354
2355static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a)
2356{
2357    return require_rvv(s) &&
2358           require_rvf(s) &&
2359           vext_check_isa_ill(s) &&
2360           vext_check_dd(s, a->rd, a->rs2, a->vm);
2361}
2362
2363/* WIDEN OPFVF with WIDEN */
2364#define GEN_OPFWF_WIDEN_TRANS(NAME)                              \
2365static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
2366{                                                                \
2367    if (opfwf_widen_check(s, a)) {                               \
2368        uint32_t data = 0;                                       \
2369        static gen_helper_opfvf *const fns[2] = {                \
2370            gen_helper_##NAME##_h, gen_helper_##NAME##_w,        \
2371        };                                                       \
2372        gen_set_rm(s, RISCV_FRM_DYN);                            \
2373        data = FIELD_DP32(data, VDATA, VM, a->vm);               \
2374        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
2375        return opfvf_trans(a->rd, a->rs1, a->rs2, data,          \
2376                           fns[s->sew - 1], s);                  \
2377    }                                                            \
2378    return false;                                                \
2379}
2380
2381GEN_OPFWF_WIDEN_TRANS(vfwadd_wf)
2382GEN_OPFWF_WIDEN_TRANS(vfwsub_wf)
2383
2384/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2385GEN_OPFVV_TRANS(vfmul_vv, opfvv_check)
2386GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check)
2387GEN_OPFVF_TRANS(vfmul_vf,  opfvf_check)
2388GEN_OPFVF_TRANS(vfdiv_vf,  opfvf_check)
2389GEN_OPFVF_TRANS(vfrdiv_vf,  opfvf_check)
2390
2391/* Vector Widening Floating-Point Multiply */
2392GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check)
2393GEN_OPFVF_WIDEN_TRANS(vfwmul_vf)
2394
2395/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
2396GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check)
2397GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check)
2398GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check)
2399GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check)
2400GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check)
2401GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check)
2402GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check)
2403GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check)
2404GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check)
2405GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check)
2406GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check)
2407GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check)
2408GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check)
2409GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check)
2410GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check)
2411GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check)
2412
2413/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
2414GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check)
2415GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check)
2416GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check)
2417GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check)
2418GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf)
2419GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf)
2420GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf)
2421GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf)
2422
2423/* Vector Floating-Point Square-Root Instruction */
2424
2425/*
2426 * If the current SEW does not correspond to a supported IEEE floating-point
2427 * type, an illegal instruction exception is raised
2428 */
2429static bool opfv_check(DisasContext *s, arg_rmr *a)
2430{
2431    return require_rvv(s) &&
2432           require_rvf(s) &&
2433           vext_check_isa_ill(s) &&
2434           /* OPFV instructions ignore vs1 check */
2435           vext_check_ss(s, a->rd, a->rs2, a->vm);
2436}
2437
2438static bool do_opfv(DisasContext *s, arg_rmr *a,
2439                    gen_helper_gvec_3_ptr *fn,
2440                    bool (*checkfn)(DisasContext *, arg_rmr *),
2441                    int rm)
2442{
2443    if (checkfn(s, a)) {
2444        if (rm != RISCV_FRM_DYN) {
2445            gen_set_rm(s, RISCV_FRM_DYN);
2446        }
2447
2448        uint32_t data = 0;
2449        TCGLabel *over = gen_new_label();
2450        gen_set_rm(s, rm);
2451        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
2452
2453        data = FIELD_DP32(data, VDATA, VM, a->vm);
2454        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
2455        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
2456                           vreg_ofs(s, a->rs2), cpu_env,
2457                           s->vlen / 8, s->vlen / 8, data, fn);
2458        mark_vs_dirty(s);
2459        gen_set_label(over);
2460        return true;
2461    }
2462    return false;
2463}
2464
2465#define GEN_OPFV_TRANS(NAME, CHECK, FRM)               \
2466static bool trans_##NAME(DisasContext *s, arg_rmr *a)  \
2467{                                                      \
2468    static gen_helper_gvec_3_ptr * const fns[3] = {    \
2469        gen_helper_##NAME##_h,                         \
2470        gen_helper_##NAME##_w,                         \
2471        gen_helper_##NAME##_d                          \
2472    };                                                 \
2473    return do_opfv(s, a, fns[s->sew - 1], CHECK, FRM); \
2474}
2475
2476GEN_OPFV_TRANS(vfsqrt_v, opfv_check, RISCV_FRM_DYN)
2477GEN_OPFV_TRANS(vfrsqrt7_v, opfv_check, RISCV_FRM_DYN)
2478GEN_OPFV_TRANS(vfrec7_v, opfv_check, RISCV_FRM_DYN)
2479
2480/* Vector Floating-Point MIN/MAX Instructions */
2481GEN_OPFVV_TRANS(vfmin_vv, opfvv_check)
2482GEN_OPFVV_TRANS(vfmax_vv, opfvv_check)
2483GEN_OPFVF_TRANS(vfmin_vf, opfvf_check)
2484GEN_OPFVF_TRANS(vfmax_vf, opfvf_check)
2485
2486/* Vector Floating-Point Sign-Injection Instructions */
2487GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check)
2488GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check)
2489GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check)
2490GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check)
2491GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check)
2492GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check)
2493
2494/* Vector Floating-Point Compare Instructions */
2495static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a)
2496{
2497    return require_rvv(s) &&
2498           require_rvf(s) &&
2499           vext_check_isa_ill(s) &&
2500           vext_check_mss(s, a->rd, a->rs1, a->rs2);
2501}
2502
2503GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check)
2504GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check)
2505GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check)
2506GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check)
2507
2508static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a)
2509{
2510    return require_rvv(s) &&
2511           require_rvf(s) &&
2512           vext_check_isa_ill(s) &&
2513           vext_check_ms(s, a->rd, a->rs2);
2514}
2515
2516GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check)
2517GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check)
2518GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check)
2519GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check)
2520GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check)
2521GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check)
2522
2523/* Vector Floating-Point Classify Instruction */
2524GEN_OPFV_TRANS(vfclass_v, opfv_check, RISCV_FRM_DYN)
2525
2526/* Vector Floating-Point Merge Instruction */
2527GEN_OPFVF_TRANS(vfmerge_vfm,  opfvf_check)
2528
2529static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
2530{
2531    if (require_rvv(s) &&
2532        require_rvf(s) &&
2533        vext_check_isa_ill(s) &&
2534        require_align(a->rd, s->lmul)) {
2535        gen_set_rm(s, RISCV_FRM_DYN);
2536
2537        TCGv_i64 t1;
2538
2539        if (s->vl_eq_vlmax) {
2540            t1 = tcg_temp_new_i64();
2541            /* NaN-box f[rs1] */
2542            do_nanbox(s, t1, cpu_fpr[a->rs1]);
2543
2544            tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
2545                                 MAXSZ(s), MAXSZ(s), t1);
2546            mark_vs_dirty(s);
2547        } else {
2548            TCGv_ptr dest;
2549            TCGv_i32 desc;
2550            uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
2551            static gen_helper_vmv_vx * const fns[3] = {
2552                gen_helper_vmv_v_x_h,
2553                gen_helper_vmv_v_x_w,
2554                gen_helper_vmv_v_x_d,
2555            };
2556            TCGLabel *over = gen_new_label();
2557            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
2558
2559            t1 = tcg_temp_new_i64();
2560            /* NaN-box f[rs1] */
2561            do_nanbox(s, t1, cpu_fpr[a->rs1]);
2562
2563            dest = tcg_temp_new_ptr();
2564            desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
2565            tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
2566
2567            fns[s->sew - 1](dest, t1, cpu_env, desc);
2568
2569            tcg_temp_free_ptr(dest);
2570            mark_vs_dirty(s);
2571            gen_set_label(over);
2572        }
2573        tcg_temp_free_i64(t1);
2574        return true;
2575    }
2576    return false;
2577}
2578
2579/* Single-Width Floating-Point/Integer Type-Convert Instructions */
2580#define GEN_OPFV_CVT_TRANS(NAME, HELPER, FRM)               \
2581static bool trans_##NAME(DisasContext *s, arg_rmr *a)       \
2582{                                                           \
2583    static gen_helper_gvec_3_ptr * const fns[3] = {         \
2584        gen_helper_##HELPER##_h,                            \
2585        gen_helper_##HELPER##_w,                            \
2586        gen_helper_##HELPER##_d                             \
2587    };                                                      \
2588    return do_opfv(s, a, fns[s->sew - 1], opfv_check, FRM); \
2589}
2590
2591GEN_OPFV_CVT_TRANS(vfcvt_xu_f_v, vfcvt_xu_f_v, RISCV_FRM_DYN)
2592GEN_OPFV_CVT_TRANS(vfcvt_x_f_v, vfcvt_x_f_v, RISCV_FRM_DYN)
2593GEN_OPFV_CVT_TRANS(vfcvt_f_xu_v, vfcvt_f_xu_v, RISCV_FRM_DYN)
2594GEN_OPFV_CVT_TRANS(vfcvt_f_x_v, vfcvt_f_x_v, RISCV_FRM_DYN)
2595/* Reuse the helper functions from vfcvt.xu.f.v and vfcvt.x.f.v */
2596GEN_OPFV_CVT_TRANS(vfcvt_rtz_xu_f_v, vfcvt_xu_f_v, RISCV_FRM_RTZ)
2597GEN_OPFV_CVT_TRANS(vfcvt_rtz_x_f_v, vfcvt_x_f_v, RISCV_FRM_RTZ)
2598
2599/* Widening Floating-Point/Integer Type-Convert Instructions */
2600
2601/*
2602 * If the current SEW does not correspond to a supported IEEE floating-point
2603 * type, an illegal instruction exception is raised
2604 */
2605static bool opfv_widen_check(DisasContext *s, arg_rmr *a)
2606{
2607    return require_rvv(s) &&
2608           require_scale_rvf(s) &&
2609           (s->sew != MO_8) &&
2610           vext_check_isa_ill(s) &&
2611           vext_check_ds(s, a->rd, a->rs2, a->vm);
2612}
2613
2614#define GEN_OPFV_WIDEN_TRANS(NAME, HELPER, FRM)                    \
2615static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
2616{                                                                  \
2617    if (opfv_widen_check(s, a)) {                                  \
2618        if (FRM != RISCV_FRM_DYN) {                                \
2619            gen_set_rm(s, RISCV_FRM_DYN);                          \
2620        }                                                          \
2621                                                                   \
2622        uint32_t data = 0;                                         \
2623        static gen_helper_gvec_3_ptr * const fns[2] = {            \
2624            gen_helper_##HELPER##_h,                               \
2625            gen_helper_##HELPER##_w,                               \
2626        };                                                         \
2627        TCGLabel *over = gen_new_label();                          \
2628        gen_set_rm(s, FRM);                                        \
2629        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2630                                                                   \
2631        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2632        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2633        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2634                           vreg_ofs(s, a->rs2), cpu_env,           \
2635                           s->vlen / 8, s->vlen / 8, data,         \
2636                           fns[s->sew - 1]);                       \
2637        mark_vs_dirty(s);                                          \
2638        gen_set_label(over);                                       \
2639        return true;                                               \
2640    }                                                              \
2641    return false;                                                  \
2642}
2643
2644GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v, vfwcvt_xu_f_v, RISCV_FRM_DYN)
2645GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v, vfwcvt_x_f_v, RISCV_FRM_DYN)
2646GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v, vfwcvt_f_f_v, RISCV_FRM_DYN)
2647/* Reuse the helper functions from vfwcvt.xu.f.v and vfwcvt.x.f.v */
2648GEN_OPFV_WIDEN_TRANS(vfwcvt_rtz_xu_f_v, vfwcvt_xu_f_v, RISCV_FRM_RTZ)
2649GEN_OPFV_WIDEN_TRANS(vfwcvt_rtz_x_f_v, vfwcvt_x_f_v, RISCV_FRM_RTZ)
2650
2651static bool opfxv_widen_check(DisasContext *s, arg_rmr *a)
2652{
2653    return require_rvv(s) &&
2654           require_scale_rvf(s) &&
2655           vext_check_isa_ill(s) &&
2656           /* OPFV widening instructions ignore vs1 check */
2657           vext_check_ds(s, a->rd, a->rs2, a->vm);
2658}
2659
2660#define GEN_OPFXV_WIDEN_TRANS(NAME)                                \
2661static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
2662{                                                                  \
2663    if (opfxv_widen_check(s, a)) {                                 \
2664        uint32_t data = 0;                                         \
2665        static gen_helper_gvec_3_ptr * const fns[3] = {            \
2666            gen_helper_##NAME##_b,                                 \
2667            gen_helper_##NAME##_h,                                 \
2668            gen_helper_##NAME##_w,                                 \
2669        };                                                         \
2670        TCGLabel *over = gen_new_label();                          \
2671        gen_set_rm(s, RISCV_FRM_DYN);                              \
2672        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2673                                                                   \
2674        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2675        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2676                           vreg_ofs(s, a->rs2), cpu_env,           \
2677                           s->vlen / 8, s->vlen / 8, data,         \
2678                           fns[s->sew]);                           \
2679        mark_vs_dirty(s);                                          \
2680        gen_set_label(over);                                       \
2681        return true;                                               \
2682    }                                                              \
2683    return false;                                                  \
2684}
2685
2686GEN_OPFXV_WIDEN_TRANS(vfwcvt_f_xu_v)
2687GEN_OPFXV_WIDEN_TRANS(vfwcvt_f_x_v)
2688
2689/* Narrowing Floating-Point/Integer Type-Convert Instructions */
2690
2691/*
2692 * If the current SEW does not correspond to a supported IEEE floating-point
2693 * type, an illegal instruction exception is raised
2694 */
2695static bool opfv_narrow_check(DisasContext *s, arg_rmr *a)
2696{
2697    return require_rvv(s) &&
2698           require_rvf(s) &&
2699           (s->sew != MO_64) &&
2700           vext_check_isa_ill(s) &&
2701           /* OPFV narrowing instructions ignore vs1 check */
2702           vext_check_sd(s, a->rd, a->rs2, a->vm);
2703}
2704
2705#define GEN_OPFV_NARROW_TRANS(NAME, HELPER, FRM)                   \
2706static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
2707{                                                                  \
2708    if (opfv_narrow_check(s, a)) {                                 \
2709        if (FRM != RISCV_FRM_DYN) {                                \
2710            gen_set_rm(s, RISCV_FRM_DYN);                          \
2711        }                                                          \
2712                                                                   \
2713        uint32_t data = 0;                                         \
2714        static gen_helper_gvec_3_ptr * const fns[2] = {            \
2715            gen_helper_##HELPER##_h,                               \
2716            gen_helper_##HELPER##_w,                               \
2717        };                                                         \
2718        TCGLabel *over = gen_new_label();                          \
2719        gen_set_rm(s, FRM);                                        \
2720        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2721                                                                   \
2722        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2723        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2724        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2725                           vreg_ofs(s, a->rs2), cpu_env,           \
2726                           s->vlen / 8, s->vlen / 8, data,         \
2727                           fns[s->sew - 1]);                       \
2728        mark_vs_dirty(s);                                          \
2729        gen_set_label(over);                                       \
2730        return true;                                               \
2731    }                                                              \
2732    return false;                                                  \
2733}
2734
2735GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_w, vfncvt_f_xu_w, RISCV_FRM_DYN)
2736GEN_OPFV_NARROW_TRANS(vfncvt_f_x_w, vfncvt_f_x_w, RISCV_FRM_DYN)
2737GEN_OPFV_NARROW_TRANS(vfncvt_f_f_w, vfncvt_f_f_w, RISCV_FRM_DYN)
2738/* Reuse the helper function from vfncvt.f.f.w */
2739GEN_OPFV_NARROW_TRANS(vfncvt_rod_f_f_w, vfncvt_f_f_w, RISCV_FRM_ROD)
2740
2741static bool opxfv_narrow_check(DisasContext *s, arg_rmr *a)
2742{
2743    return require_rvv(s) &&
2744           require_scale_rvf(s) &&
2745           vext_check_isa_ill(s) &&
2746           /* OPFV narrowing instructions ignore vs1 check */
2747           vext_check_sd(s, a->rd, a->rs2, a->vm);
2748}
2749
2750#define GEN_OPXFV_NARROW_TRANS(NAME, HELPER, FRM)                  \
2751static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
2752{                                                                  \
2753    if (opxfv_narrow_check(s, a)) {                                \
2754        if (FRM != RISCV_FRM_DYN) {                                \
2755            gen_set_rm(s, RISCV_FRM_DYN);                          \
2756        }                                                          \
2757                                                                   \
2758        uint32_t data = 0;                                         \
2759        static gen_helper_gvec_3_ptr * const fns[3] = {            \
2760            gen_helper_##HELPER##_b,                               \
2761            gen_helper_##HELPER##_h,                               \
2762            gen_helper_##HELPER##_w,                               \
2763        };                                                         \
2764        TCGLabel *over = gen_new_label();                          \
2765        gen_set_rm(s, FRM);                                        \
2766        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2767                                                                   \
2768        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2769        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2770                           vreg_ofs(s, a->rs2), cpu_env,           \
2771                           s->vlen / 8, s->vlen / 8, data,         \
2772                           fns[s->sew]);                           \
2773        mark_vs_dirty(s);                                          \
2774        gen_set_label(over);                                       \
2775        return true;                                               \
2776    }                                                              \
2777    return false;                                                  \
2778}
2779
2780GEN_OPXFV_NARROW_TRANS(vfncvt_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_DYN)
2781GEN_OPXFV_NARROW_TRANS(vfncvt_x_f_w, vfncvt_x_f_w, RISCV_FRM_DYN)
2782/* Reuse the helper functions from vfncvt.xu.f.w and vfncvt.x.f.w */
2783GEN_OPXFV_NARROW_TRANS(vfncvt_rtz_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_RTZ)
2784GEN_OPXFV_NARROW_TRANS(vfncvt_rtz_x_f_w, vfncvt_x_f_w, RISCV_FRM_RTZ)
2785
2786/*
2787 *** Vector Reduction Operations
2788 */
2789/* Vector Single-Width Integer Reduction Instructions */
2790static bool reduction_check(DisasContext *s, arg_rmrr *a)
2791{
2792    return require_rvv(s) &&
2793           vext_check_isa_ill(s) &&
2794           vext_check_reduction(s, a->rs2);
2795}
2796
2797GEN_OPIVV_TRANS(vredsum_vs, reduction_check)
2798GEN_OPIVV_TRANS(vredmaxu_vs, reduction_check)
2799GEN_OPIVV_TRANS(vredmax_vs, reduction_check)
2800GEN_OPIVV_TRANS(vredminu_vs, reduction_check)
2801GEN_OPIVV_TRANS(vredmin_vs, reduction_check)
2802GEN_OPIVV_TRANS(vredand_vs, reduction_check)
2803GEN_OPIVV_TRANS(vredor_vs, reduction_check)
2804GEN_OPIVV_TRANS(vredxor_vs, reduction_check)
2805
2806/* Vector Widening Integer Reduction Instructions */
2807static bool reduction_widen_check(DisasContext *s, arg_rmrr *a)
2808{
2809    return reduction_check(s, a) && (s->sew < MO_64);
2810}
2811
2812GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_widen_check)
2813GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_widen_check)
2814
2815/* Vector Single-Width Floating-Point Reduction Instructions */
2816static bool freduction_check(DisasContext *s, arg_rmrr *a)
2817{
2818    return reduction_check(s, a) &&
2819           require_rvf(s);
2820}
2821
2822GEN_OPFVV_TRANS(vfredsum_vs, freduction_check)
2823GEN_OPFVV_TRANS(vfredmax_vs, freduction_check)
2824GEN_OPFVV_TRANS(vfredmin_vs, freduction_check)
2825
2826/* Vector Widening Floating-Point Reduction Instructions */
2827static bool freduction_widen_check(DisasContext *s, arg_rmrr *a)
2828{
2829    return reduction_widen_check(s, a) &&
2830           require_scale_rvf(s) &&
2831           (s->sew != MO_8);
2832}
2833
2834GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, freduction_widen_check)
2835
2836/*
2837 *** Vector Mask Operations
2838 */
2839
2840/* Vector Mask-Register Logical Instructions */
2841#define GEN_MM_TRANS(NAME)                                         \
2842static bool trans_##NAME(DisasContext *s, arg_r *a)                \
2843{                                                                  \
2844    if (require_rvv(s) &&                                          \
2845        vext_check_isa_ill(s)) {                                   \
2846        uint32_t data = 0;                                         \
2847        gen_helper_gvec_4_ptr *fn = gen_helper_##NAME;             \
2848        TCGLabel *over = gen_new_label();                          \
2849        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2850                                                                   \
2851        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2852        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2853                           vreg_ofs(s, a->rs1),                    \
2854                           vreg_ofs(s, a->rs2), cpu_env,           \
2855                           s->vlen / 8, s->vlen / 8, data, fn);    \
2856        mark_vs_dirty(s);                                          \
2857        gen_set_label(over);                                       \
2858        return true;                                               \
2859    }                                                              \
2860    return false;                                                  \
2861}
2862
2863GEN_MM_TRANS(vmand_mm)
2864GEN_MM_TRANS(vmnand_mm)
2865GEN_MM_TRANS(vmandnot_mm)
2866GEN_MM_TRANS(vmxor_mm)
2867GEN_MM_TRANS(vmor_mm)
2868GEN_MM_TRANS(vmnor_mm)
2869GEN_MM_TRANS(vmornot_mm)
2870GEN_MM_TRANS(vmxnor_mm)
2871
2872/* Vector count population in mask vcpop */
2873static bool trans_vcpop_m(DisasContext *s, arg_rmr *a)
2874{
2875    if (require_rvv(s) &&
2876        vext_check_isa_ill(s) &&
2877        s->vstart == 0) {
2878        TCGv_ptr src2, mask;
2879        TCGv dst;
2880        TCGv_i32 desc;
2881        uint32_t data = 0;
2882        data = FIELD_DP32(data, VDATA, VM, a->vm);
2883        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
2884
2885        mask = tcg_temp_new_ptr();
2886        src2 = tcg_temp_new_ptr();
2887        dst = dest_gpr(s, a->rd);
2888        desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
2889
2890        tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
2891        tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
2892
2893        gen_helper_vcpop_m(dst, mask, src2, cpu_env, desc);
2894        gen_set_gpr(s, a->rd, dst);
2895
2896        tcg_temp_free_ptr(mask);
2897        tcg_temp_free_ptr(src2);
2898
2899        return true;
2900    }
2901    return false;
2902}
2903
2904/* vmfirst find-first-set mask bit */
2905static bool trans_vfirst_m(DisasContext *s, arg_rmr *a)
2906{
2907    if (require_rvv(s) &&
2908        vext_check_isa_ill(s) &&
2909        s->vstart == 0) {
2910        TCGv_ptr src2, mask;
2911        TCGv dst;
2912        TCGv_i32 desc;
2913        uint32_t data = 0;
2914        data = FIELD_DP32(data, VDATA, VM, a->vm);
2915        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
2916
2917        mask = tcg_temp_new_ptr();
2918        src2 = tcg_temp_new_ptr();
2919        dst = dest_gpr(s, a->rd);
2920        desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
2921
2922        tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
2923        tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
2924
2925        gen_helper_vfirst_m(dst, mask, src2, cpu_env, desc);
2926        gen_set_gpr(s, a->rd, dst);
2927
2928        tcg_temp_free_ptr(mask);
2929        tcg_temp_free_ptr(src2);
2930        return true;
2931    }
2932    return false;
2933}
2934
2935/* vmsbf.m set-before-first mask bit */
2936/* vmsif.m set-includ-first mask bit */
2937/* vmsof.m set-only-first mask bit */
2938#define GEN_M_TRANS(NAME)                                          \
2939static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
2940{                                                                  \
2941    if (require_rvv(s) &&                                          \
2942        vext_check_isa_ill(s) &&                                   \
2943        require_vm(a->vm, a->rd) &&                                \
2944        (a->rd != a->rs2) &&                                       \
2945        (s->vstart == 0)) {                                        \
2946        uint32_t data = 0;                                         \
2947        gen_helper_gvec_3_ptr *fn = gen_helper_##NAME;             \
2948        TCGLabel *over = gen_new_label();                          \
2949        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2950                                                                   \
2951        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2952        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2953        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd),                     \
2954                           vreg_ofs(s, 0), vreg_ofs(s, a->rs2),    \
2955                           cpu_env, s->vlen / 8, s->vlen / 8,      \
2956                           data, fn);                              \
2957        mark_vs_dirty(s);                                          \
2958        gen_set_label(over);                                       \
2959        return true;                                               \
2960    }                                                              \
2961    return false;                                                  \
2962}
2963
2964GEN_M_TRANS(vmsbf_m)
2965GEN_M_TRANS(vmsif_m)
2966GEN_M_TRANS(vmsof_m)
2967
2968/*
2969 * Vector Iota Instruction
2970 *
2971 * 1. The destination register cannot overlap the source register.
2972 * 2. If masked, cannot overlap the mask register ('v0').
2973 * 3. An illegal instruction exception is raised if vstart is non-zero.
2974 */
2975static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
2976{
2977    if (require_rvv(s) &&
2978        vext_check_isa_ill(s) &&
2979        !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) &&
2980        require_vm(a->vm, a->rd) &&
2981        require_align(a->rd, s->lmul) &&
2982        (s->vstart == 0)) {
2983        uint32_t data = 0;
2984        TCGLabel *over = gen_new_label();
2985        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
2986
2987        data = FIELD_DP32(data, VDATA, VM, a->vm);
2988        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
2989        static gen_helper_gvec_3_ptr * const fns[4] = {
2990            gen_helper_viota_m_b, gen_helper_viota_m_h,
2991            gen_helper_viota_m_w, gen_helper_viota_m_d,
2992        };
2993        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
2994                           vreg_ofs(s, a->rs2), cpu_env,
2995                           s->vlen / 8, s->vlen / 8, data, fns[s->sew]);
2996        mark_vs_dirty(s);
2997        gen_set_label(over);
2998        return true;
2999    }
3000    return false;
3001}
3002
3003/* Vector Element Index Instruction */
3004static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
3005{
3006    if (require_rvv(s) &&
3007        vext_check_isa_ill(s) &&
3008        require_align(a->rd, s->lmul) &&
3009        require_vm(a->vm, a->rd)) {
3010        uint32_t data = 0;
3011        TCGLabel *over = gen_new_label();
3012        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
3013
3014        data = FIELD_DP32(data, VDATA, VM, a->vm);
3015        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
3016        static gen_helper_gvec_2_ptr * const fns[4] = {
3017            gen_helper_vid_v_b, gen_helper_vid_v_h,
3018            gen_helper_vid_v_w, gen_helper_vid_v_d,
3019        };
3020        tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
3021                           cpu_env, s->vlen / 8, s->vlen / 8,
3022                           data, fns[s->sew]);
3023        mark_vs_dirty(s);
3024        gen_set_label(over);
3025        return true;
3026    }
3027    return false;
3028}
3029
3030/*
3031 *** Vector Permutation Instructions
3032 */
3033
3034static void load_element(TCGv_i64 dest, TCGv_ptr base,
3035                         int ofs, int sew, bool sign)
3036{
3037    switch (sew) {
3038    case MO_8:
3039        if (!sign) {
3040            tcg_gen_ld8u_i64(dest, base, ofs);
3041        } else {
3042            tcg_gen_ld8s_i64(dest, base, ofs);
3043        }
3044        break;
3045    case MO_16:
3046        if (!sign) {
3047            tcg_gen_ld16u_i64(dest, base, ofs);
3048        } else {
3049            tcg_gen_ld16s_i64(dest, base, ofs);
3050        }
3051        break;
3052    case MO_32:
3053        if (!sign) {
3054            tcg_gen_ld32u_i64(dest, base, ofs);
3055        } else {
3056            tcg_gen_ld32s_i64(dest, base, ofs);
3057        }
3058        break;
3059    case MO_64:
3060        tcg_gen_ld_i64(dest, base, ofs);
3061        break;
3062    default:
3063        g_assert_not_reached();
3064        break;
3065    }
3066}
3067
3068/* offset of the idx element with base regsiter r */
3069static uint32_t endian_ofs(DisasContext *s, int r, int idx)
3070{
3071#ifdef HOST_WORDS_BIGENDIAN
3072    return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew);
3073#else
3074    return vreg_ofs(s, r) + (idx << s->sew);
3075#endif
3076}
3077
3078/* adjust the index according to the endian */
3079static void endian_adjust(TCGv_i32 ofs, int sew)
3080{
3081#ifdef HOST_WORDS_BIGENDIAN
3082    tcg_gen_xori_i32(ofs, ofs, 7 >> sew);
3083#endif
3084}
3085
3086/* Load idx >= VLMAX ? 0 : vreg[idx] */
3087static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
3088                              int vreg, TCGv idx, int vlmax)
3089{
3090    TCGv_i32 ofs = tcg_temp_new_i32();
3091    TCGv_ptr base = tcg_temp_new_ptr();
3092    TCGv_i64 t_idx = tcg_temp_new_i64();
3093    TCGv_i64 t_vlmax, t_zero;
3094
3095    /*
3096     * Mask the index to the length so that we do
3097     * not produce an out-of-range load.
3098     */
3099    tcg_gen_trunc_tl_i32(ofs, idx);
3100    tcg_gen_andi_i32(ofs, ofs, vlmax - 1);
3101
3102    /* Convert the index to an offset. */
3103    endian_adjust(ofs, s->sew);
3104    tcg_gen_shli_i32(ofs, ofs, s->sew);
3105
3106    /* Convert the index to a pointer. */
3107    tcg_gen_ext_i32_ptr(base, ofs);
3108    tcg_gen_add_ptr(base, base, cpu_env);
3109
3110    /* Perform the load. */
3111    load_element(dest, base,
3112                 vreg_ofs(s, vreg), s->sew, false);
3113    tcg_temp_free_ptr(base);
3114    tcg_temp_free_i32(ofs);
3115
3116    /* Flush out-of-range indexing to zero.  */
3117    t_vlmax = tcg_constant_i64(vlmax);
3118    t_zero = tcg_constant_i64(0);
3119    tcg_gen_extu_tl_i64(t_idx, idx);
3120
3121    tcg_gen_movcond_i64(TCG_COND_LTU, dest, t_idx,
3122                        t_vlmax, dest, t_zero);
3123
3124    tcg_temp_free_i64(t_idx);
3125}
3126
3127static void vec_element_loadi(DisasContext *s, TCGv_i64 dest,
3128                              int vreg, int idx, bool sign)
3129{
3130    load_element(dest, cpu_env, endian_ofs(s, vreg, idx), s->sew, sign);
3131}
3132
3133/* Integer Scalar Move Instruction */
3134
3135static void store_element(TCGv_i64 val, TCGv_ptr base,
3136                          int ofs, int sew)
3137{
3138    switch (sew) {
3139    case MO_8:
3140        tcg_gen_st8_i64(val, base, ofs);
3141        break;
3142    case MO_16:
3143        tcg_gen_st16_i64(val, base, ofs);
3144        break;
3145    case MO_32:
3146        tcg_gen_st32_i64(val, base, ofs);
3147        break;
3148    case MO_64:
3149        tcg_gen_st_i64(val, base, ofs);
3150        break;
3151    default:
3152        g_assert_not_reached();
3153        break;
3154    }
3155}
3156
3157/*
3158 * Store vreg[idx] = val.
3159 * The index must be in range of VLMAX.
3160 */
3161static void vec_element_storei(DisasContext *s, int vreg,
3162                               int idx, TCGv_i64 val)
3163{
3164    store_element(val, cpu_env, endian_ofs(s, vreg, idx), s->sew);
3165}
3166
3167/* vmv.x.s rd, vs2 # x[rd] = vs2[0] */
3168static bool trans_vmv_x_s(DisasContext *s, arg_vmv_x_s *a)
3169{
3170    if (require_rvv(s) &&
3171        vext_check_isa_ill(s)) {
3172        TCGv_i64 t1;
3173        TCGv dest;
3174
3175        t1 = tcg_temp_new_i64();
3176        dest = tcg_temp_new();
3177        /*
3178         * load vreg and sign-extend to 64 bits,
3179         * then truncate to XLEN bits before storing to gpr.
3180         */
3181        vec_element_loadi(s, t1, a->rs2, 0, true);
3182        tcg_gen_trunc_i64_tl(dest, t1);
3183        gen_set_gpr(s, a->rd, dest);
3184        tcg_temp_free_i64(t1);
3185        tcg_temp_free(dest);
3186
3187        return true;
3188    }
3189    return false;
3190}
3191
3192/* vmv.s.x vd, rs1 # vd[0] = rs1 */
3193static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
3194{
3195    if (require_rvv(s) &&
3196        vext_check_isa_ill(s)) {
3197        /* This instruction ignores LMUL and vector register groups */
3198        TCGv_i64 t1;
3199        TCGv s1;
3200        TCGLabel *over = gen_new_label();
3201
3202        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
3203        tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
3204
3205        t1 = tcg_temp_new_i64();
3206
3207        /*
3208         * load gpr and sign-extend to 64 bits,
3209         * then truncate to SEW bits when storing to vreg.
3210         */
3211        s1 = get_gpr(s, a->rs1, EXT_NONE);
3212        tcg_gen_ext_tl_i64(t1, s1);
3213        vec_element_storei(s, a->rd, 0, t1);
3214        tcg_temp_free_i64(t1);
3215        mark_vs_dirty(s);
3216        gen_set_label(over);
3217        return true;
3218    }
3219    return false;
3220}
3221
3222/* Floating-Point Scalar Move Instructions */
3223static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a)
3224{
3225    if (require_rvv(s) &&
3226        require_rvf(s) &&
3227        vext_check_isa_ill(s)) {
3228        gen_set_rm(s, RISCV_FRM_DYN);
3229
3230        unsigned int ofs = (8 << s->sew);
3231        unsigned int len = 64 - ofs;
3232        TCGv_i64 t_nan;
3233
3234        vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0, false);
3235        /* NaN-box f[rd] as necessary for SEW */
3236        if (len) {
3237            t_nan = tcg_constant_i64(UINT64_MAX);
3238            tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rd],
3239                                t_nan, ofs, len);
3240        }
3241
3242        mark_fs_dirty(s);
3243        return true;
3244    }
3245    return false;
3246}
3247
3248/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */
3249static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
3250{
3251    if (require_rvv(s) &&
3252        require_rvf(s) &&
3253        vext_check_isa_ill(s)) {
3254        gen_set_rm(s, RISCV_FRM_DYN);
3255
3256        /* The instructions ignore LMUL and vector register group. */
3257        TCGv_i64 t1;
3258        TCGLabel *over = gen_new_label();
3259
3260        /* if vl == 0 or vstart >= vl, skip vector register write back */
3261        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
3262        tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
3263
3264        /* NaN-box f[rs1] */
3265        t1 = tcg_temp_new_i64();
3266        do_nanbox(s, t1, cpu_fpr[a->rs1]);
3267
3268        vec_element_storei(s, a->rd, 0, t1);
3269        tcg_temp_free_i64(t1);
3270        mark_vs_dirty(s);
3271        gen_set_label(over);
3272        return true;
3273    }
3274    return false;
3275}
3276
3277/* Vector Slide Instructions */
3278static bool slideup_check(DisasContext *s, arg_rmrr *a)
3279{
3280    return require_rvv(s) &&
3281           vext_check_isa_ill(s) &&
3282           vext_check_slide(s, a->rd, a->rs2, a->vm, true);
3283}
3284
3285GEN_OPIVX_TRANS(vslideup_vx, slideup_check)
3286GEN_OPIVX_TRANS(vslide1up_vx, slideup_check)
3287GEN_OPIVI_TRANS(vslideup_vi, IMM_ZX, vslideup_vx, slideup_check)
3288
3289static bool slidedown_check(DisasContext *s, arg_rmrr *a)
3290{
3291    return require_rvv(s) &&
3292           vext_check_isa_ill(s) &&
3293           vext_check_slide(s, a->rd, a->rs2, a->vm, false);
3294}
3295
3296GEN_OPIVX_TRANS(vslidedown_vx, slidedown_check)
3297GEN_OPIVX_TRANS(vslide1down_vx, slidedown_check)
3298GEN_OPIVI_TRANS(vslidedown_vi, IMM_ZX, vslidedown_vx, slidedown_check)
3299
3300/* Vector Floating-Point Slide Instructions */
3301static bool fslideup_check(DisasContext *s, arg_rmrr *a)
3302{
3303    return slideup_check(s, a) &&
3304           require_rvf(s);
3305}
3306
3307static bool fslidedown_check(DisasContext *s, arg_rmrr *a)
3308{
3309    return slidedown_check(s, a) &&
3310           require_rvf(s);
3311}
3312
3313GEN_OPFVF_TRANS(vfslide1up_vf, fslideup_check)
3314GEN_OPFVF_TRANS(vfslide1down_vf, fslidedown_check)
3315
3316/* Vector Register Gather Instruction */
3317static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a)
3318{
3319    return require_rvv(s) &&
3320           vext_check_isa_ill(s) &&
3321           require_align(a->rd, s->lmul) &&
3322           require_align(a->rs1, s->lmul) &&
3323           require_align(a->rs2, s->lmul) &&
3324           (a->rd != a->rs2 && a->rd != a->rs1) &&
3325           require_vm(a->vm, a->rd);
3326}
3327
3328static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a)
3329{
3330    int8_t emul = MO_16 - s->sew + s->lmul;
3331    return require_rvv(s) &&
3332           vext_check_isa_ill(s) &&
3333           (emul >= -3 && emul <= 3) &&
3334           require_align(a->rd, s->lmul) &&
3335           require_align(a->rs1, emul) &&
3336           require_align(a->rs2, s->lmul) &&
3337           (a->rd != a->rs2 && a->rd != a->rs1) &&
3338           !is_overlapped(a->rd, 1 << MAX(s->lmul, 0),
3339                          a->rs1, 1 << MAX(emul, 0)) &&
3340           !is_overlapped(a->rd, 1 << MAX(s->lmul, 0),
3341                          a->rs2, 1 << MAX(s->lmul, 0)) &&
3342           require_vm(a->vm, a->rd);
3343}
3344
3345GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check)
3346GEN_OPIVV_TRANS(vrgatherei16_vv, vrgatherei16_vv_check)
3347
3348static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a)
3349{
3350    return require_rvv(s) &&
3351           vext_check_isa_ill(s) &&
3352           require_align(a->rd, s->lmul) &&
3353           require_align(a->rs2, s->lmul) &&
3354           (a->rd != a->rs2) &&
3355           require_vm(a->vm, a->rd);
3356}
3357
3358/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3359static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a)
3360{
3361    if (!vrgather_vx_check(s, a)) {
3362        return false;
3363    }
3364
3365    if (a->vm && s->vl_eq_vlmax) {
3366        int scale = s->lmul - (s->sew + 3);
3367        int vlmax = scale < 0 ? s->vlen >> -scale : s->vlen << scale;
3368        TCGv_i64 dest = tcg_temp_new_i64();
3369
3370        if (a->rs1 == 0) {
3371            vec_element_loadi(s, dest, a->rs2, 0, false);
3372        } else {
3373            vec_element_loadx(s, dest, a->rs2, cpu_gpr[a->rs1], vlmax);
3374        }
3375
3376        tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
3377                             MAXSZ(s), MAXSZ(s), dest);
3378        tcg_temp_free_i64(dest);
3379        mark_vs_dirty(s);
3380    } else {
3381        static gen_helper_opivx * const fns[4] = {
3382            gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h,
3383            gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d
3384        };
3385        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);
3386    }
3387    return true;
3388}
3389
3390/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */
3391static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a)
3392{
3393    if (!vrgather_vx_check(s, a)) {
3394        return false;
3395    }
3396
3397    if (a->vm && s->vl_eq_vlmax) {
3398        int scale = s->lmul - (s->sew + 3);
3399        int vlmax = scale < 0 ? s->vlen >> -scale : s->vlen << scale;
3400        if (a->rs1 >= vlmax) {
3401            tcg_gen_gvec_dup_imm(MO_64, vreg_ofs(s, a->rd),
3402                                 MAXSZ(s), MAXSZ(s), 0);
3403        } else {
3404            tcg_gen_gvec_dup_mem(s->sew, vreg_ofs(s, a->rd),
3405                                 endian_ofs(s, a->rs2, a->rs1),
3406                                 MAXSZ(s), MAXSZ(s));
3407        }
3408        mark_vs_dirty(s);
3409    } else {
3410        static gen_helper_opivx * const fns[4] = {
3411            gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h,
3412            gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d
3413        };
3414        return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew],
3415                           s, IMM_ZX);
3416    }
3417    return true;
3418}
3419
3420/*
3421 * Vector Compress Instruction
3422 *
3423 * The destination vector register group cannot overlap the
3424 * source vector register group or the source mask register.
3425 */
3426static bool vcompress_vm_check(DisasContext *s, arg_r *a)
3427{
3428    return require_rvv(s) &&
3429           vext_check_isa_ill(s) &&
3430           require_align(a->rd, s->lmul) &&
3431           require_align(a->rs2, s->lmul) &&
3432           (a->rd != a->rs2) &&
3433           !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs1, 1) &&
3434           (s->vstart == 0);
3435}
3436
3437static bool trans_vcompress_vm(DisasContext *s, arg_r *a)
3438{
3439    if (vcompress_vm_check(s, a)) {
3440        uint32_t data = 0;
3441        static gen_helper_gvec_4_ptr * const fns[4] = {
3442            gen_helper_vcompress_vm_b, gen_helper_vcompress_vm_h,
3443            gen_helper_vcompress_vm_w, gen_helper_vcompress_vm_d,
3444        };
3445        TCGLabel *over = gen_new_label();
3446        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
3447
3448        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
3449        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
3450                           vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
3451                           cpu_env, s->vlen / 8, s->vlen / 8, data,
3452                           fns[s->sew]);
3453        mark_vs_dirty(s);
3454        gen_set_label(over);
3455        return true;
3456    }
3457    return false;
3458}
3459
3460/*
3461 * Whole Vector Register Move Instructions ignore vtype and vl setting.
3462 * Thus, we don't need to check vill bit. (Section 16.6)
3463 */
3464#define GEN_VMV_WHOLE_TRANS(NAME, LEN, SEQ)                             \
3465static bool trans_##NAME(DisasContext *s, arg_##NAME * a)               \
3466{                                                                       \
3467    if (require_rvv(s) &&                                               \
3468        QEMU_IS_ALIGNED(a->rd, LEN) &&                                  \
3469        QEMU_IS_ALIGNED(a->rs2, LEN)) {                                 \
3470        uint32_t maxsz = (s->vlen >> 3) * LEN;                          \
3471        if (s->vstart == 0) {                                           \
3472            /* EEW = 8 */                                               \
3473            tcg_gen_gvec_mov(MO_8, vreg_ofs(s, a->rd),                  \
3474                             vreg_ofs(s, a->rs2), maxsz, maxsz);        \
3475            mark_vs_dirty(s);                                           \
3476        } else {                                                        \
3477            TCGLabel *over = gen_new_label();                           \
3478            tcg_gen_brcondi_tl(TCG_COND_GEU, cpu_vstart, maxsz, over);  \
3479                                                                        \
3480            static gen_helper_gvec_2_ptr * const fns[4] = {             \
3481                gen_helper_vmv1r_v, gen_helper_vmv2r_v,                 \
3482                gen_helper_vmv4r_v, gen_helper_vmv8r_v,                 \
3483            };                                                          \
3484            tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), \
3485                               cpu_env, maxsz, maxsz, 0, fns[SEQ]);     \
3486            mark_vs_dirty(s);                                           \
3487            gen_set_label(over);                                        \
3488        }                                                               \
3489        return true;                                                    \
3490    }                                                                   \
3491    return false;                                                       \
3492}
3493
3494GEN_VMV_WHOLE_TRANS(vmv1r_v, 1, 0)
3495GEN_VMV_WHOLE_TRANS(vmv2r_v, 2, 1)
3496GEN_VMV_WHOLE_TRANS(vmv4r_v, 4, 2)
3497GEN_VMV_WHOLE_TRANS(vmv8r_v, 8, 3)
3498
3499static bool int_ext_check(DisasContext *s, arg_rmr *a, uint8_t div)
3500{
3501    uint8_t from = (s->sew + 3) - div;
3502    bool ret = require_rvv(s) &&
3503        (from >= 3 && from <= 8) &&
3504        (a->rd != a->rs2) &&
3505        require_align(a->rd, s->lmul) &&
3506        require_align(a->rs2, s->lmul - div) &&
3507        require_vm(a->vm, a->rd) &&
3508        require_noover(a->rd, s->lmul, a->rs2, s->lmul - div);
3509    return ret;
3510}
3511
3512static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
3513{
3514    uint32_t data = 0;
3515    gen_helper_gvec_3_ptr *fn;
3516    TCGLabel *over = gen_new_label();
3517    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
3518
3519    static gen_helper_gvec_3_ptr * const fns[6][4] = {
3520        {
3521            NULL, gen_helper_vzext_vf2_h,
3522            gen_helper_vzext_vf2_w, gen_helper_vzext_vf2_d
3523        },
3524        {
3525            NULL, NULL,
3526            gen_helper_vzext_vf4_w, gen_helper_vzext_vf4_d,
3527        },
3528        {
3529            NULL, NULL,
3530            NULL, gen_helper_vzext_vf8_d
3531        },
3532        {
3533            NULL, gen_helper_vsext_vf2_h,
3534            gen_helper_vsext_vf2_w, gen_helper_vsext_vf2_d
3535        },
3536        {
3537            NULL, NULL,
3538            gen_helper_vsext_vf4_w, gen_helper_vsext_vf4_d,
3539        },
3540        {
3541            NULL, NULL,
3542            NULL, gen_helper_vsext_vf8_d
3543        }
3544    };
3545
3546    fn = fns[seq][s->sew];
3547    if (fn == NULL) {
3548        return false;
3549    }
3550
3551    data = FIELD_DP32(data, VDATA, VM, a->vm);
3552
3553    tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
3554                       vreg_ofs(s, a->rs2), cpu_env,
3555                       s->vlen / 8, s->vlen / 8, data, fn);
3556
3557    mark_vs_dirty(s);
3558    gen_set_label(over);
3559    return true;
3560}
3561
3562/* Vector Integer Extension */
3563#define GEN_INT_EXT_TRANS(NAME, DIV, SEQ)             \
3564static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
3565{                                                     \
3566    if (int_ext_check(s, a, DIV)) {                   \
3567        return int_ext_op(s, a, SEQ);                 \
3568    }                                                 \
3569    return false;                                     \
3570}
3571
3572GEN_INT_EXT_TRANS(vzext_vf2, 1, 0)
3573GEN_INT_EXT_TRANS(vzext_vf4, 2, 1)
3574GEN_INT_EXT_TRANS(vzext_vf8, 3, 2)
3575GEN_INT_EXT_TRANS(vsext_vf2, 1, 3)
3576GEN_INT_EXT_TRANS(vsext_vf4, 2, 4)
3577GEN_INT_EXT_TRANS(vsext_vf8, 3, 5)
3578