1/*
2 * RISC-V translation routines for the RVV Standard Extension.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18#include "tcg/tcg-op-gvec.h"
19#include "tcg/tcg-gvec-desc.h"
20#include "internals.h"
21
22static inline bool is_overlapped(const int8_t astart, int8_t asize,
23                                 const int8_t bstart, int8_t bsize)
24{
25    const int8_t aend = astart + asize;
26    const int8_t bend = bstart + bsize;
27
28    return MAX(aend, bend) - MIN(astart, bstart) < asize + bsize;
29}
30
31static bool require_rvv(DisasContext *s)
32{
33    return s->mstatus_vs != 0;
34}
35
36static bool require_rvf(DisasContext *s)
37{
38    if (s->mstatus_fs == 0) {
39        return false;
40    }
41
42    switch (s->sew) {
43    case MO_16:
44    case MO_32:
45        return has_ext(s, RVF);
46    case MO_64:
47        return has_ext(s, RVD);
48    default:
49        return false;
50    }
51}
52
53static bool require_scale_rvf(DisasContext *s)
54{
55    if (s->mstatus_fs == 0) {
56        return false;
57    }
58
59    switch (s->sew) {
60    case MO_8:
61    case MO_16:
62        return has_ext(s, RVF);
63    case MO_32:
64        return has_ext(s, RVD);
65    default:
66        return false;
67    }
68}
69
70/* Destination vector register group cannot overlap source mask register. */
71static bool require_vm(int vm, int vd)
72{
73    return (vm != 0 || vd != 0);
74}
75
76static bool require_nf(int vd, int nf, int lmul)
77{
78    int size = nf << MAX(lmul, 0);
79    return size <= 8 && vd + size <= 32;
80}
81
82/*
83 * Vector register should aligned with the passed-in LMUL (EMUL).
84 * If LMUL < 0, i.e. fractional LMUL, any vector register is allowed.
85 */
86static bool require_align(const int8_t val, const int8_t lmul)
87{
88    return lmul <= 0 || extract32(val, 0, lmul) == 0;
89}
90
91/*
92 * A destination vector register group can overlap a source vector
93 * register group only if one of the following holds:
94 *  1. The destination EEW equals the source EEW.
95 *  2. The destination EEW is smaller than the source EEW and the overlap
96 *     is in the lowest-numbered part of the source register group.
97 *  3. The destination EEW is greater than the source EEW, the source EMUL
98 *     is at least 1, and the overlap is in the highest-numbered part of
99 *     the destination register group.
100 * (Section 5.2)
101 *
102 * This function returns true if one of the following holds:
103 *  * Destination vector register group does not overlap a source vector
104 *    register group.
105 *  * Rule 3 met.
106 * For rule 1, overlap is allowed so this function doesn't need to be called.
107 * For rule 2, (vd == vs). Caller has to check whether: (vd != vs) before
108 * calling this function.
109 */
110static bool require_noover(const int8_t dst, const int8_t dst_lmul,
111                           const int8_t src, const int8_t src_lmul)
112{
113    int8_t dst_size = dst_lmul <= 0 ? 1 : 1 << dst_lmul;
114    int8_t src_size = src_lmul <= 0 ? 1 : 1 << src_lmul;
115
116    /* Destination EEW is greater than the source EEW, check rule 3. */
117    if (dst_size > src_size) {
118        if (dst < src &&
119            src_lmul >= 0 &&
120            is_overlapped(dst, dst_size, src, src_size) &&
121            !is_overlapped(dst, dst_size, src + src_size, src_size)) {
122            return true;
123        }
124    }
125
126    return !is_overlapped(dst, dst_size, src, src_size);
127}
128
129static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2)
130{
131    TCGv s1, dst;
132
133    if (!require_rvv(s) || !has_ext(s, RVV)) {
134        return false;
135    }
136
137    dst = dest_gpr(s, rd);
138
139    if (rd == 0 && rs1 == 0) {
140        s1 = tcg_temp_new();
141        tcg_gen_mov_tl(s1, cpu_vl);
142    } else if (rs1 == 0) {
143        /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
144        s1 = tcg_constant_tl(RV_VLEN_MAX);
145    } else {
146        s1 = get_gpr(s, rs1, EXT_ZERO);
147    }
148
149    gen_helper_vsetvl(dst, cpu_env, s1, s2);
150    gen_set_gpr(s, rd, dst);
151    mark_vs_dirty(s);
152
153    tcg_gen_movi_tl(cpu_pc, s->pc_succ_insn);
154    tcg_gen_lookup_and_goto_ptr();
155    s->base.is_jmp = DISAS_NORETURN;
156
157    if (rd == 0 && rs1 == 0) {
158        tcg_temp_free(s1);
159    }
160
161    return true;
162}
163
164static bool trans_vsetvl(DisasContext *s, arg_vsetvl *a)
165{
166    TCGv s2 = get_gpr(s, a->rs2, EXT_ZERO);
167    return do_vsetvl(s, a->rd, a->rs1, s2);
168}
169
170static bool trans_vsetvli(DisasContext *s, arg_vsetvli *a)
171{
172    TCGv s2 = tcg_constant_tl(a->zimm);
173    return do_vsetvl(s, a->rd, a->rs1, s2);
174}
175
176/* vector register offset from env */
177static uint32_t vreg_ofs(DisasContext *s, int reg)
178{
179    return offsetof(CPURISCVState, vreg) + reg * s->vlen / 8;
180}
181
182/* check functions */
183
184/*
185 * Vector unit-stride, strided, unit-stride segment, strided segment
186 * store check function.
187 *
188 * Rules to be checked here:
189 *   1. EMUL must within the range: 1/8 <= EMUL <= 8. (Section 7.3)
190 *   2. Destination vector register number is multiples of EMUL.
191 *      (Section 3.4.2, 7.3)
192 *   3. The EMUL setting must be such that EMUL * NFIELDS ≤ 8. (Section 7.8)
193 *   4. Vector register numbers accessed by the segment load or store
194 *      cannot increment past 31. (Section 7.8)
195 */
196static bool vext_check_store(DisasContext *s, int vd, int nf, uint8_t eew)
197{
198    int8_t emul = eew - s->sew + s->lmul;
199    return (emul >= -3 && emul <= 3) &&
200            require_align(vd, emul) &&
201            require_nf(vd, nf, emul);
202}
203
204/*
205 * Vector unit-stride, strided, unit-stride segment, strided segment
206 * load check function.
207 *
208 * Rules to be checked here:
209 *   1. All rules applies to store instructions are applies
210 *      to load instructions.
211 *   2. Destination vector register group for a masked vector
212 *      instruction cannot overlap the source mask register (v0).
213 *      (Section 5.3)
214 */
215static bool vext_check_load(DisasContext *s, int vd, int nf, int vm,
216                            uint8_t eew)
217{
218    return vext_check_store(s, vd, nf, eew) && require_vm(vm, vd);
219}
220
221/*
222 * Vector indexed, indexed segment store check function.
223 *
224 * Rules to be checked here:
225 *   1. EMUL must within the range: 1/8 <= EMUL <= 8. (Section 7.3)
226 *   2. Index vector register number is multiples of EMUL.
227 *      (Section 3.4.2, 7.3)
228 *   3. Destination vector register number is multiples of LMUL.
229 *      (Section 3.4.2, 7.3)
230 *   4. The EMUL setting must be such that EMUL * NFIELDS ≤ 8. (Section 7.8)
231 *   5. Vector register numbers accessed by the segment load or store
232 *      cannot increment past 31. (Section 7.8)
233 */
234static bool vext_check_st_index(DisasContext *s, int vd, int vs2, int nf,
235                                uint8_t eew)
236{
237    int8_t emul = eew - s->sew + s->lmul;
238    return (emul >= -3 && emul <= 3) &&
239            require_align(vs2, emul) &&
240            require_align(vd, s->lmul) &&
241            require_nf(vd, nf, s->lmul);
242}
243
244/*
245 * Vector indexed, indexed segment load check function.
246 *
247 * Rules to be checked here:
248 *   1. All rules applies to store instructions are applies
249 *      to load instructions.
250 *   2. Destination vector register group for a masked vector
251 *      instruction cannot overlap the source mask register (v0).
252 *      (Section 5.3)
253 *   3. Destination vector register cannot overlap a source vector
254 *      register (vs2) group.
255 *      (Section 5.2)
256 *   4. Destination vector register groups cannot overlap
257 *      the source vector register (vs2) group for
258 *      indexed segment load instructions. (Section 7.8.3)
259 */
260static bool vext_check_ld_index(DisasContext *s, int vd, int vs2,
261                                int nf, int vm, uint8_t eew)
262{
263    int8_t seg_vd;
264    int8_t emul = eew - s->sew + s->lmul;
265    bool ret = vext_check_st_index(s, vd, vs2, nf, eew) &&
266        require_vm(vm, vd);
267
268    /* Each segment register group has to follow overlap rules. */
269    for (int i = 0; i < nf; ++i) {
270        seg_vd = vd + (1 << MAX(s->lmul, 0)) * i;
271
272        if (eew > s->sew) {
273            if (seg_vd != vs2) {
274                ret &= require_noover(seg_vd, s->lmul, vs2, emul);
275            }
276        } else if (eew < s->sew) {
277            ret &= require_noover(seg_vd, s->lmul, vs2, emul);
278        }
279
280        /*
281         * Destination vector register groups cannot overlap
282         * the source vector register (vs2) group for
283         * indexed segment load instructions.
284         */
285        if (nf > 1) {
286            ret &= !is_overlapped(seg_vd, 1 << MAX(s->lmul, 0),
287                                  vs2, 1 << MAX(emul, 0));
288        }
289    }
290    return ret;
291}
292
293static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm)
294{
295    return require_vm(vm, vd) &&
296        require_align(vd, s->lmul) &&
297        require_align(vs, s->lmul);
298}
299
300/*
301 * Check function for vector instruction with format:
302 * single-width result and single-width sources (SEW = SEW op SEW)
303 *
304 * Rules to be checked here:
305 *   1. Destination vector register group for a masked vector
306 *      instruction cannot overlap the source mask register (v0).
307 *      (Section 5.3)
308 *   2. Destination vector register number is multiples of LMUL.
309 *      (Section 3.4.2)
310 *   3. Source (vs2, vs1) vector register number are multiples of LMUL.
311 *      (Section 3.4.2)
312 */
313static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm)
314{
315    return vext_check_ss(s, vd, vs2, vm) &&
316        require_align(vs1, s->lmul);
317}
318
319static bool vext_check_ms(DisasContext *s, int vd, int vs)
320{
321    bool ret = require_align(vs, s->lmul);
322    if (vd != vs) {
323        ret &= require_noover(vd, 0, vs, s->lmul);
324    }
325    return ret;
326}
327
328/*
329 * Check function for maskable vector instruction with format:
330 * single-width result and single-width sources (SEW = SEW op SEW)
331 *
332 * Rules to be checked here:
333 *   1. Source (vs2, vs1) vector register number are multiples of LMUL.
334 *      (Section 3.4.2)
335 *   2. Destination vector register cannot overlap a source vector
336 *      register (vs2, vs1) group.
337 *      (Section 5.2)
338 *   3. The destination vector register group for a masked vector
339 *      instruction cannot overlap the source mask register (v0),
340 *      unless the destination vector register is being written
341 *      with a mask value (e.g., comparisons) or the scalar result
342 *      of a reduction. (Section 5.3)
343 */
344static bool vext_check_mss(DisasContext *s, int vd, int vs1, int vs2)
345{
346    bool ret = vext_check_ms(s, vd, vs2) &&
347        require_align(vs1, s->lmul);
348    if (vd != vs1) {
349        ret &= require_noover(vd, 0, vs1, s->lmul);
350    }
351    return ret;
352}
353
354/*
355 * Common check function for vector widening instructions
356 * of double-width result (2*SEW).
357 *
358 * Rules to be checked here:
359 *   1. The largest vector register group used by an instruction
360 *      can not be greater than 8 vector registers (Section 5.2):
361 *      => LMUL < 8.
362 *      => SEW < 64.
363 *   2. Destination vector register number is multiples of 2 * LMUL.
364 *      (Section 3.4.2)
365 *   3. Destination vector register group for a masked vector
366 *      instruction cannot overlap the source mask register (v0).
367 *      (Section 5.3)
368 */
369static bool vext_wide_check_common(DisasContext *s, int vd, int vm)
370{
371    return (s->lmul <= 2) &&
372           (s->sew < MO_64) &&
373           require_align(vd, s->lmul + 1) &&
374           require_vm(vm, vd);
375}
376
377/*
378 * Common check function for vector narrowing instructions
379 * of single-width result (SEW) and double-width source (2*SEW).
380 *
381 * Rules to be checked here:
382 *   1. The largest vector register group used by an instruction
383 *      can not be greater than 8 vector registers (Section 5.2):
384 *      => LMUL < 8.
385 *      => SEW < 64.
386 *   2. Source vector register number is multiples of 2 * LMUL.
387 *      (Section 3.4.2)
388 *   3. Destination vector register number is multiples of LMUL.
389 *      (Section 3.4.2)
390 *   4. Destination vector register group for a masked vector
391 *      instruction cannot overlap the source mask register (v0).
392 *      (Section 5.3)
393 */
394static bool vext_narrow_check_common(DisasContext *s, int vd, int vs2,
395                                     int vm)
396{
397    return (s->lmul <= 2) &&
398           (s->sew < MO_64) &&
399           require_align(vs2, s->lmul + 1) &&
400           require_align(vd, s->lmul) &&
401           require_vm(vm, vd);
402}
403
404static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm)
405{
406    return vext_wide_check_common(s, vd, vm) &&
407        require_align(vs, s->lmul) &&
408        require_noover(vd, s->lmul + 1, vs, s->lmul);
409}
410
411static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm)
412{
413    return vext_wide_check_common(s, vd, vm) &&
414        require_align(vs, s->lmul + 1);
415}
416
417/*
418 * Check function for vector instruction with format:
419 * double-width result and single-width sources (2*SEW = SEW op SEW)
420 *
421 * Rules to be checked here:
422 *   1. All rules in defined in widen common rules are applied.
423 *   2. Source (vs2, vs1) vector register number are multiples of LMUL.
424 *      (Section 3.4.2)
425 *   3. Destination vector register cannot overlap a source vector
426 *      register (vs2, vs1) group.
427 *      (Section 5.2)
428 */
429static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm)
430{
431    return vext_check_ds(s, vd, vs2, vm) &&
432        require_align(vs1, s->lmul) &&
433        require_noover(vd, s->lmul + 1, vs1, s->lmul);
434}
435
436/*
437 * Check function for vector instruction with format:
438 * double-width result and double-width source1 and single-width
439 * source2 (2*SEW = 2*SEW op SEW)
440 *
441 * Rules to be checked here:
442 *   1. All rules in defined in widen common rules are applied.
443 *   2. Source 1 (vs2) vector register number is multiples of 2 * LMUL.
444 *      (Section 3.4.2)
445 *   3. Source 2 (vs1) vector register number is multiples of LMUL.
446 *      (Section 3.4.2)
447 *   4. Destination vector register cannot overlap a source vector
448 *      register (vs1) group.
449 *      (Section 5.2)
450 */
451static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm)
452{
453    return vext_check_ds(s, vd, vs1, vm) &&
454        require_align(vs2, s->lmul + 1);
455}
456
457static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm)
458{
459    bool ret = vext_narrow_check_common(s, vd, vs, vm);
460    if (vd != vs) {
461        ret &= require_noover(vd, s->lmul, vs, s->lmul + 1);
462    }
463    return ret;
464}
465
466/*
467 * Check function for vector instruction with format:
468 * single-width result and double-width source 1 and single-width
469 * source 2 (SEW = 2*SEW op SEW)
470 *
471 * Rules to be checked here:
472 *   1. All rules in defined in narrow common rules are applied.
473 *   2. Destination vector register cannot overlap a source vector
474 *      register (vs2) group.
475 *      (Section 5.2)
476 *   3. Source 2 (vs1) vector register number is multiples of LMUL.
477 *      (Section 3.4.2)
478 */
479static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
480{
481    return vext_check_sd(s, vd, vs2, vm) &&
482        require_align(vs1, s->lmul);
483}
484
485/*
486 * Check function for vector reduction instructions.
487 *
488 * Rules to be checked here:
489 *   1. Source 1 (vs2) vector register number is multiples of LMUL.
490 *      (Section 3.4.2)
491 */
492static bool vext_check_reduction(DisasContext *s, int vs2)
493{
494    return require_align(vs2, s->lmul);
495}
496
497/*
498 * Check function for vector slide instructions.
499 *
500 * Rules to be checked here:
501 *   1. Source 1 (vs2) vector register number is multiples of LMUL.
502 *      (Section 3.4.2)
503 *   2. Destination vector register number is multiples of LMUL.
504 *      (Section 3.4.2)
505 *   3. Destination vector register group for a masked vector
506 *      instruction cannot overlap the source mask register (v0).
507 *      (Section 5.3)
508 *   4. The destination vector register group for vslideup, vslide1up,
509 *      vfslide1up, cannot overlap the source vector register (vs2) group.
510 *      (Section 5.2, 16.3.1, 16.3.3)
511 */
512static bool vext_check_slide(DisasContext *s, int vd, int vs2,
513                             int vm, bool is_over)
514{
515    bool ret = require_align(vs2, s->lmul) &&
516               require_align(vd, s->lmul) &&
517               require_vm(vm, vd);
518    if (is_over) {
519        ret &= (vd != vs2);
520    }
521    return ret;
522}
523
524/*
525 * In cpu_get_tb_cpu_state(), set VILL if RVV was not present.
526 * So RVV is also be checked in this function.
527 */
528static bool vext_check_isa_ill(DisasContext *s)
529{
530    return !s->vill;
531}
532
533/* common translation macro */
534#define GEN_VEXT_TRANS(NAME, EEW, ARGTYPE, OP, CHECK)        \
535static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE * a) \
536{                                                            \
537    if (CHECK(s, a, EEW)) {                                  \
538        return OP(s, a, EEW);                                \
539    }                                                        \
540    return false;                                            \
541}
542
543static uint8_t vext_get_emul(DisasContext *s, uint8_t eew)
544{
545    int8_t emul = eew - s->sew + s->lmul;
546    return emul < 0 ? 0 : emul;
547}
548
549/*
550 *** unit stride load and store
551 */
552typedef void gen_helper_ldst_us(TCGv_ptr, TCGv_ptr, TCGv,
553                                TCGv_env, TCGv_i32);
554
555static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
556                          gen_helper_ldst_us *fn, DisasContext *s,
557                          bool is_store)
558{
559    TCGv_ptr dest, mask;
560    TCGv base;
561    TCGv_i32 desc;
562
563    TCGLabel *over = gen_new_label();
564    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
565
566    dest = tcg_temp_new_ptr();
567    mask = tcg_temp_new_ptr();
568    base = get_gpr(s, rs1, EXT_NONE);
569
570    /*
571     * As simd_desc supports at most 256 bytes, and in this implementation,
572     * the max vector group length is 2048 bytes. So split it into two parts.
573     *
574     * The first part is vlen in bytes, encoded in maxsz of simd_desc.
575     * The second part is lmul, encoded in data of simd_desc.
576     */
577    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
578
579    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
580    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
581
582    fn(dest, mask, base, cpu_env, desc);
583
584    tcg_temp_free_ptr(dest);
585    tcg_temp_free_ptr(mask);
586
587    if (!is_store) {
588        mark_vs_dirty(s);
589    }
590
591    gen_set_label(over);
592    return true;
593}
594
595static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
596{
597    uint32_t data = 0;
598    gen_helper_ldst_us *fn;
599    static gen_helper_ldst_us * const fns[2][4] = {
600        /* masked unit stride load */
601        { gen_helper_vle8_v_mask, gen_helper_vle16_v_mask,
602          gen_helper_vle32_v_mask, gen_helper_vle64_v_mask },
603        /* unmasked unit stride load */
604        { gen_helper_vle8_v, gen_helper_vle16_v,
605          gen_helper_vle32_v, gen_helper_vle64_v }
606    };
607
608    fn =  fns[a->vm][eew];
609    if (fn == NULL) {
610        return false;
611    }
612
613    /*
614     * Vector load/store instructions have the EEW encoded
615     * directly in the instructions. The maximum vector size is
616     * calculated with EMUL rather than LMUL.
617     */
618    uint8_t emul = vext_get_emul(s, eew);
619    data = FIELD_DP32(data, VDATA, VM, a->vm);
620    data = FIELD_DP32(data, VDATA, LMUL, emul);
621    data = FIELD_DP32(data, VDATA, NF, a->nf);
622    return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
623}
624
625static bool ld_us_check(DisasContext *s, arg_r2nfvm* a, uint8_t eew)
626{
627    return require_rvv(s) &&
628           vext_check_isa_ill(s) &&
629           vext_check_load(s, a->rd, a->nf, a->vm, eew);
630}
631
632GEN_VEXT_TRANS(vle8_v,  MO_8,  r2nfvm, ld_us_op, ld_us_check)
633GEN_VEXT_TRANS(vle16_v, MO_16, r2nfvm, ld_us_op, ld_us_check)
634GEN_VEXT_TRANS(vle32_v, MO_32, r2nfvm, ld_us_op, ld_us_check)
635GEN_VEXT_TRANS(vle64_v, MO_64, r2nfvm, ld_us_op, ld_us_check)
636
637static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
638{
639    uint32_t data = 0;
640    gen_helper_ldst_us *fn;
641    static gen_helper_ldst_us * const fns[2][4] = {
642        /* masked unit stride store */
643        { gen_helper_vse8_v_mask, gen_helper_vse16_v_mask,
644          gen_helper_vse32_v_mask, gen_helper_vse64_v_mask },
645        /* unmasked unit stride store */
646        { gen_helper_vse8_v, gen_helper_vse16_v,
647          gen_helper_vse32_v, gen_helper_vse64_v }
648    };
649
650    fn =  fns[a->vm][eew];
651    if (fn == NULL) {
652        return false;
653    }
654
655    uint8_t emul = vext_get_emul(s, eew);
656    data = FIELD_DP32(data, VDATA, VM, a->vm);
657    data = FIELD_DP32(data, VDATA, LMUL, emul);
658    data = FIELD_DP32(data, VDATA, NF, a->nf);
659    return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
660}
661
662static bool st_us_check(DisasContext *s, arg_r2nfvm* a, uint8_t eew)
663{
664    return require_rvv(s) &&
665           vext_check_isa_ill(s) &&
666           vext_check_store(s, a->rd, a->nf, eew);
667}
668
669GEN_VEXT_TRANS(vse8_v,  MO_8,  r2nfvm, st_us_op, st_us_check)
670GEN_VEXT_TRANS(vse16_v, MO_16, r2nfvm, st_us_op, st_us_check)
671GEN_VEXT_TRANS(vse32_v, MO_32, r2nfvm, st_us_op, st_us_check)
672GEN_VEXT_TRANS(vse64_v, MO_64, r2nfvm, st_us_op, st_us_check)
673
674/*
675 *** stride load and store
676 */
677typedef void gen_helper_ldst_stride(TCGv_ptr, TCGv_ptr, TCGv,
678                                    TCGv, TCGv_env, TCGv_i32);
679
680static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
681                              uint32_t data, gen_helper_ldst_stride *fn,
682                              DisasContext *s, bool is_store)
683{
684    TCGv_ptr dest, mask;
685    TCGv base, stride;
686    TCGv_i32 desc;
687
688    TCGLabel *over = gen_new_label();
689    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
690
691    dest = tcg_temp_new_ptr();
692    mask = tcg_temp_new_ptr();
693    base = get_gpr(s, rs1, EXT_NONE);
694    stride = get_gpr(s, rs2, EXT_NONE);
695    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
696
697    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
698    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
699
700    fn(dest, mask, base, stride, cpu_env, desc);
701
702    tcg_temp_free_ptr(dest);
703    tcg_temp_free_ptr(mask);
704
705    if (!is_store) {
706        mark_vs_dirty(s);
707    }
708
709    gen_set_label(over);
710    return true;
711}
712
713static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
714{
715    uint32_t data = 0;
716    gen_helper_ldst_stride *fn;
717    static gen_helper_ldst_stride * const fns[4] = {
718        gen_helper_vlse8_v, gen_helper_vlse16_v,
719        gen_helper_vlse32_v, gen_helper_vlse64_v
720    };
721
722    fn = fns[eew];
723    if (fn == NULL) {
724        return false;
725    }
726
727    uint8_t emul = vext_get_emul(s, eew);
728    data = FIELD_DP32(data, VDATA, VM, a->vm);
729    data = FIELD_DP32(data, VDATA, LMUL, emul);
730    data = FIELD_DP32(data, VDATA, NF, a->nf);
731    return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
732}
733
734static bool ld_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
735{
736    return require_rvv(s) &&
737           vext_check_isa_ill(s) &&
738           vext_check_load(s, a->rd, a->nf, a->vm, eew);
739}
740
741GEN_VEXT_TRANS(vlse8_v,  MO_8,  rnfvm, ld_stride_op, ld_stride_check)
742GEN_VEXT_TRANS(vlse16_v, MO_16, rnfvm, ld_stride_op, ld_stride_check)
743GEN_VEXT_TRANS(vlse32_v, MO_32, rnfvm, ld_stride_op, ld_stride_check)
744GEN_VEXT_TRANS(vlse64_v, MO_64, rnfvm, ld_stride_op, ld_stride_check)
745
746static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
747{
748    uint32_t data = 0;
749    gen_helper_ldst_stride *fn;
750    static gen_helper_ldst_stride * const fns[4] = {
751        /* masked stride store */
752        gen_helper_vsse8_v,  gen_helper_vsse16_v,
753        gen_helper_vsse32_v,  gen_helper_vsse64_v
754    };
755
756    uint8_t emul = vext_get_emul(s, eew);
757    data = FIELD_DP32(data, VDATA, VM, a->vm);
758    data = FIELD_DP32(data, VDATA, LMUL, emul);
759    data = FIELD_DP32(data, VDATA, NF, a->nf);
760    fn = fns[eew];
761    if (fn == NULL) {
762        return false;
763    }
764
765    return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
766}
767
768static bool st_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
769{
770    return require_rvv(s) &&
771           vext_check_isa_ill(s) &&
772           vext_check_store(s, a->rd, a->nf, eew);
773}
774
775GEN_VEXT_TRANS(vsse8_v,  MO_8,  rnfvm, st_stride_op, st_stride_check)
776GEN_VEXT_TRANS(vsse16_v, MO_16, rnfvm, st_stride_op, st_stride_check)
777GEN_VEXT_TRANS(vsse32_v, MO_32, rnfvm, st_stride_op, st_stride_check)
778GEN_VEXT_TRANS(vsse64_v, MO_64, rnfvm, st_stride_op, st_stride_check)
779
780/*
781 *** index load and store
782 */
783typedef void gen_helper_ldst_index(TCGv_ptr, TCGv_ptr, TCGv,
784                                   TCGv_ptr, TCGv_env, TCGv_i32);
785
786static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
787                             uint32_t data, gen_helper_ldst_index *fn,
788                             DisasContext *s, bool is_store)
789{
790    TCGv_ptr dest, mask, index;
791    TCGv base;
792    TCGv_i32 desc;
793
794    TCGLabel *over = gen_new_label();
795    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
796
797    dest = tcg_temp_new_ptr();
798    mask = tcg_temp_new_ptr();
799    index = tcg_temp_new_ptr();
800    base = get_gpr(s, rs1, EXT_NONE);
801    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
802
803    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
804    tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2));
805    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
806
807    fn(dest, mask, base, index, cpu_env, desc);
808
809    tcg_temp_free_ptr(dest);
810    tcg_temp_free_ptr(mask);
811    tcg_temp_free_ptr(index);
812
813    if (!is_store) {
814        mark_vs_dirty(s);
815    }
816
817    gen_set_label(over);
818    return true;
819}
820
821static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
822{
823    uint32_t data = 0;
824    gen_helper_ldst_index *fn;
825    static gen_helper_ldst_index * const fns[4][4] = {
826        /*
827         * offset vector register group EEW = 8,
828         * data vector register group EEW = SEW
829         */
830        { gen_helper_vlxei8_8_v,  gen_helper_vlxei8_16_v,
831          gen_helper_vlxei8_32_v, gen_helper_vlxei8_64_v },
832        /*
833         * offset vector register group EEW = 16,
834         * data vector register group EEW = SEW
835         */
836        { gen_helper_vlxei16_8_v, gen_helper_vlxei16_16_v,
837          gen_helper_vlxei16_32_v, gen_helper_vlxei16_64_v },
838        /*
839         * offset vector register group EEW = 32,
840         * data vector register group EEW = SEW
841         */
842        { gen_helper_vlxei32_8_v, gen_helper_vlxei32_16_v,
843          gen_helper_vlxei32_32_v, gen_helper_vlxei32_64_v },
844        /*
845         * offset vector register group EEW = 64,
846         * data vector register group EEW = SEW
847         */
848        { gen_helper_vlxei64_8_v, gen_helper_vlxei64_16_v,
849          gen_helper_vlxei64_32_v, gen_helper_vlxei64_64_v }
850    };
851
852    fn = fns[eew][s->sew];
853
854    uint8_t emul = vext_get_emul(s, s->sew);
855    data = FIELD_DP32(data, VDATA, VM, a->vm);
856    data = FIELD_DP32(data, VDATA, LMUL, emul);
857    data = FIELD_DP32(data, VDATA, NF, a->nf);
858    return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
859}
860
861static bool ld_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
862{
863    return require_rvv(s) &&
864           vext_check_isa_ill(s) &&
865           vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew);
866}
867
868GEN_VEXT_TRANS(vlxei8_v,  MO_8,  rnfvm, ld_index_op, ld_index_check)
869GEN_VEXT_TRANS(vlxei16_v, MO_16, rnfvm, ld_index_op, ld_index_check)
870GEN_VEXT_TRANS(vlxei32_v, MO_32, rnfvm, ld_index_op, ld_index_check)
871GEN_VEXT_TRANS(vlxei64_v, MO_64, rnfvm, ld_index_op, ld_index_check)
872
873static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
874{
875    uint32_t data = 0;
876    gen_helper_ldst_index *fn;
877    static gen_helper_ldst_index * const fns[4][4] = {
878        /*
879         * offset vector register group EEW = 8,
880         * data vector register group EEW = SEW
881         */
882        { gen_helper_vsxei8_8_v,  gen_helper_vsxei8_16_v,
883          gen_helper_vsxei8_32_v, gen_helper_vsxei8_64_v },
884        /*
885         * offset vector register group EEW = 16,
886         * data vector register group EEW = SEW
887         */
888        { gen_helper_vsxei16_8_v, gen_helper_vsxei16_16_v,
889          gen_helper_vsxei16_32_v, gen_helper_vsxei16_64_v },
890        /*
891         * offset vector register group EEW = 32,
892         * data vector register group EEW = SEW
893         */
894        { gen_helper_vsxei32_8_v, gen_helper_vsxei32_16_v,
895          gen_helper_vsxei32_32_v, gen_helper_vsxei32_64_v },
896        /*
897         * offset vector register group EEW = 64,
898         * data vector register group EEW = SEW
899         */
900        { gen_helper_vsxei64_8_v, gen_helper_vsxei64_16_v,
901          gen_helper_vsxei64_32_v, gen_helper_vsxei64_64_v }
902    };
903
904    fn = fns[eew][s->sew];
905
906    uint8_t emul = vext_get_emul(s, s->sew);
907    data = FIELD_DP32(data, VDATA, VM, a->vm);
908    data = FIELD_DP32(data, VDATA, LMUL, emul);
909    data = FIELD_DP32(data, VDATA, NF, a->nf);
910    return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
911}
912
913static bool st_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
914{
915    return require_rvv(s) &&
916           vext_check_isa_ill(s) &&
917           vext_check_st_index(s, a->rd, a->rs2, a->nf, eew);
918}
919
920GEN_VEXT_TRANS(vsxei8_v,  MO_8,  rnfvm, st_index_op, st_index_check)
921GEN_VEXT_TRANS(vsxei16_v, MO_16, rnfvm, st_index_op, st_index_check)
922GEN_VEXT_TRANS(vsxei32_v, MO_32, rnfvm, st_index_op, st_index_check)
923GEN_VEXT_TRANS(vsxei64_v, MO_64, rnfvm, st_index_op, st_index_check)
924
925/*
926 *** unit stride fault-only-first load
927 */
928static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
929                       gen_helper_ldst_us *fn, DisasContext *s)
930{
931    TCGv_ptr dest, mask;
932    TCGv base;
933    TCGv_i32 desc;
934
935    TCGLabel *over = gen_new_label();
936    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
937
938    dest = tcg_temp_new_ptr();
939    mask = tcg_temp_new_ptr();
940    base = get_gpr(s, rs1, EXT_NONE);
941    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
942
943    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
944    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
945
946    fn(dest, mask, base, cpu_env, desc);
947
948    tcg_temp_free_ptr(dest);
949    tcg_temp_free_ptr(mask);
950    mark_vs_dirty(s);
951    gen_set_label(over);
952    return true;
953}
954
955static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
956{
957    uint32_t data = 0;
958    gen_helper_ldst_us *fn;
959    static gen_helper_ldst_us * const fns[4] = {
960        gen_helper_vle8ff_v, gen_helper_vle16ff_v,
961        gen_helper_vle32ff_v, gen_helper_vle64ff_v
962    };
963
964    fn = fns[eew];
965    if (fn == NULL) {
966        return false;
967    }
968
969    uint8_t emul = vext_get_emul(s, eew);
970    data = FIELD_DP32(data, VDATA, VM, a->vm);
971    data = FIELD_DP32(data, VDATA, LMUL, emul);
972    data = FIELD_DP32(data, VDATA, NF, a->nf);
973    return ldff_trans(a->rd, a->rs1, data, fn, s);
974}
975
976GEN_VEXT_TRANS(vle8ff_v,  MO_8,  r2nfvm, ldff_op, ld_us_check)
977GEN_VEXT_TRANS(vle16ff_v, MO_16, r2nfvm, ldff_op, ld_us_check)
978GEN_VEXT_TRANS(vle32ff_v, MO_32, r2nfvm, ldff_op, ld_us_check)
979GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check)
980
981/*
982 * load and store whole register instructions
983 */
984typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32);
985
986static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
987                             gen_helper_ldst_whole *fn, DisasContext *s,
988                             bool is_store)
989{
990    TCGv_ptr dest;
991    TCGv base;
992    TCGv_i32 desc;
993
994    uint32_t data = FIELD_DP32(0, VDATA, NF, nf);
995    dest = tcg_temp_new_ptr();
996    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
997
998    base = get_gpr(s, rs1, EXT_NONE);
999    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
1000
1001    fn(dest, base, cpu_env, desc);
1002
1003    tcg_temp_free_ptr(dest);
1004
1005    if (!is_store) {
1006        mark_vs_dirty(s);
1007    }
1008
1009    return true;
1010}
1011
1012/*
1013 * load and store whole register instructions ignore vtype and vl setting.
1014 * Thus, we don't need to check vill bit. (Section 7.9)
1015 */
1016#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF, IS_STORE)                      \
1017static bool trans_##NAME(DisasContext *s, arg_##NAME * a)                 \
1018{                                                                         \
1019    if (require_rvv(s) &&                                                 \
1020        QEMU_IS_ALIGNED(a->rd, ARG_NF)) {                                 \
1021        return ldst_whole_trans(a->rd, a->rs1, ARG_NF, gen_helper_##NAME, \
1022                                s, IS_STORE);                             \
1023    }                                                                     \
1024    return false;                                                         \
1025}
1026
1027GEN_LDST_WHOLE_TRANS(vl1re8_v,  1, false)
1028GEN_LDST_WHOLE_TRANS(vl1re16_v, 1, false)
1029GEN_LDST_WHOLE_TRANS(vl1re32_v, 1, false)
1030GEN_LDST_WHOLE_TRANS(vl1re64_v, 1, false)
1031GEN_LDST_WHOLE_TRANS(vl2re8_v,  2, false)
1032GEN_LDST_WHOLE_TRANS(vl2re16_v, 2, false)
1033GEN_LDST_WHOLE_TRANS(vl2re32_v, 2, false)
1034GEN_LDST_WHOLE_TRANS(vl2re64_v, 2, false)
1035GEN_LDST_WHOLE_TRANS(vl4re8_v,  4, false)
1036GEN_LDST_WHOLE_TRANS(vl4re16_v, 4, false)
1037GEN_LDST_WHOLE_TRANS(vl4re32_v, 4, false)
1038GEN_LDST_WHOLE_TRANS(vl4re64_v, 4, false)
1039GEN_LDST_WHOLE_TRANS(vl8re8_v,  8, false)
1040GEN_LDST_WHOLE_TRANS(vl8re16_v, 8, false)
1041GEN_LDST_WHOLE_TRANS(vl8re32_v, 8, false)
1042GEN_LDST_WHOLE_TRANS(vl8re64_v, 8, false)
1043
1044GEN_LDST_WHOLE_TRANS(vs1r_v, 1, true)
1045GEN_LDST_WHOLE_TRANS(vs2r_v, 2, true)
1046GEN_LDST_WHOLE_TRANS(vs4r_v, 4, true)
1047GEN_LDST_WHOLE_TRANS(vs8r_v, 8, true)
1048
1049/*
1050 *** Vector Integer Arithmetic Instructions
1051 */
1052
1053/*
1054 * MAXSZ returns the maximum vector size can be operated in bytes,
1055 * which is used in GVEC IR when vl_eq_vlmax flag is set to true
1056 * to accerlate vector operation.
1057 */
1058static inline uint32_t MAXSZ(DisasContext *s)
1059{
1060    int scale = s->lmul - 3;
1061    return scale < 0 ? s->vlen >> -scale : s->vlen << scale;
1062}
1063
1064static bool opivv_check(DisasContext *s, arg_rmrr *a)
1065{
1066    return require_rvv(s) &&
1067           vext_check_isa_ill(s) &&
1068           vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
1069}
1070
1071typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
1072                        uint32_t, uint32_t, uint32_t);
1073
1074static inline bool
1075do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
1076              gen_helper_gvec_4_ptr *fn)
1077{
1078    TCGLabel *over = gen_new_label();
1079    if (!opivv_check(s, a)) {
1080        return false;
1081    }
1082
1083    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1084
1085    if (a->vm && s->vl_eq_vlmax) {
1086        gvec_fn(s->sew, vreg_ofs(s, a->rd),
1087                vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
1088                MAXSZ(s), MAXSZ(s));
1089    } else {
1090        uint32_t data = 0;
1091
1092        data = FIELD_DP32(data, VDATA, VM, a->vm);
1093        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1094        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
1095                           vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
1096                           cpu_env, s->vlen / 8, s->vlen / 8, data, fn);
1097    }
1098    mark_vs_dirty(s);
1099    gen_set_label(over);
1100    return true;
1101}
1102
1103/* OPIVV with GVEC IR */
1104#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \
1105static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
1106{                                                                  \
1107    static gen_helper_gvec_4_ptr * const fns[4] = {                \
1108        gen_helper_##NAME##_b, gen_helper_##NAME##_h,              \
1109        gen_helper_##NAME##_w, gen_helper_##NAME##_d,              \
1110    };                                                             \
1111    return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);   \
1112}
1113
1114GEN_OPIVV_GVEC_TRANS(vadd_vv, add)
1115GEN_OPIVV_GVEC_TRANS(vsub_vv, sub)
1116
1117typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr,
1118                              TCGv_env, TCGv_i32);
1119
1120static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
1121                        gen_helper_opivx *fn, DisasContext *s)
1122{
1123    TCGv_ptr dest, src2, mask;
1124    TCGv src1;
1125    TCGv_i32 desc;
1126    uint32_t data = 0;
1127
1128    TCGLabel *over = gen_new_label();
1129    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1130
1131    dest = tcg_temp_new_ptr();
1132    mask = tcg_temp_new_ptr();
1133    src2 = tcg_temp_new_ptr();
1134    src1 = get_gpr(s, rs1, EXT_NONE);
1135
1136    data = FIELD_DP32(data, VDATA, VM, vm);
1137    data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1138    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
1139
1140    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
1141    tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
1142    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
1143
1144    fn(dest, mask, src1, src2, cpu_env, desc);
1145
1146    tcg_temp_free_ptr(dest);
1147    tcg_temp_free_ptr(mask);
1148    tcg_temp_free_ptr(src2);
1149    mark_vs_dirty(s);
1150    gen_set_label(over);
1151    return true;
1152}
1153
1154static bool opivx_check(DisasContext *s, arg_rmrr *a)
1155{
1156    return require_rvv(s) &&
1157           vext_check_isa_ill(s) &&
1158           vext_check_ss(s, a->rd, a->rs2, a->vm);
1159}
1160
1161typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64,
1162                         uint32_t, uint32_t);
1163
1164static inline bool
1165do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
1166              gen_helper_opivx *fn)
1167{
1168    if (!opivx_check(s, a)) {
1169        return false;
1170    }
1171
1172    if (a->vm && s->vl_eq_vlmax) {
1173        TCGv_i64 src1 = tcg_temp_new_i64();
1174
1175        tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN));
1176        gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
1177                src1, MAXSZ(s), MAXSZ(s));
1178
1179        tcg_temp_free_i64(src1);
1180        mark_vs_dirty(s);
1181        return true;
1182    }
1183    return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
1184}
1185
1186/* OPIVX with GVEC IR */
1187#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \
1188static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
1189{                                                                  \
1190    static gen_helper_opivx * const fns[4] = {                     \
1191        gen_helper_##NAME##_b, gen_helper_##NAME##_h,              \
1192        gen_helper_##NAME##_w, gen_helper_##NAME##_d,              \
1193    };                                                             \
1194    return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);   \
1195}
1196
1197GEN_OPIVX_GVEC_TRANS(vadd_vx, adds)
1198GEN_OPIVX_GVEC_TRANS(vsub_vx, subs)
1199
1200static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1201{
1202    tcg_gen_vec_sub8_i64(d, b, a);
1203}
1204
1205static void gen_vec_rsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1206{
1207    tcg_gen_vec_sub16_i64(d, b, a);
1208}
1209
1210static void gen_rsub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
1211{
1212    tcg_gen_sub_i32(ret, arg2, arg1);
1213}
1214
1215static void gen_rsub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1216{
1217    tcg_gen_sub_i64(ret, arg2, arg1);
1218}
1219
1220static void gen_rsub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
1221{
1222    tcg_gen_sub_vec(vece, r, b, a);
1223}
1224
1225static void tcg_gen_gvec_rsubs(unsigned vece, uint32_t dofs, uint32_t aofs,
1226                               TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
1227{
1228    static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
1229    static const GVecGen2s rsub_op[4] = {
1230        { .fni8 = gen_vec_rsub8_i64,
1231          .fniv = gen_rsub_vec,
1232          .fno = gen_helper_vec_rsubs8,
1233          .opt_opc = vecop_list,
1234          .vece = MO_8 },
1235        { .fni8 = gen_vec_rsub16_i64,
1236          .fniv = gen_rsub_vec,
1237          .fno = gen_helper_vec_rsubs16,
1238          .opt_opc = vecop_list,
1239          .vece = MO_16 },
1240        { .fni4 = gen_rsub_i32,
1241          .fniv = gen_rsub_vec,
1242          .fno = gen_helper_vec_rsubs32,
1243          .opt_opc = vecop_list,
1244          .vece = MO_32 },
1245        { .fni8 = gen_rsub_i64,
1246          .fniv = gen_rsub_vec,
1247          .fno = gen_helper_vec_rsubs64,
1248          .opt_opc = vecop_list,
1249          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1250          .vece = MO_64 },
1251    };
1252
1253    tcg_debug_assert(vece <= MO_64);
1254    tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
1255}
1256
1257GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs)
1258
1259typedef enum {
1260    IMM_ZX,         /* Zero-extended */
1261    IMM_SX,         /* Sign-extended */
1262    IMM_TRUNC_SEW,  /* Truncate to log(SEW) bits */
1263    IMM_TRUNC_2SEW, /* Truncate to log(2*SEW) bits */
1264} imm_mode_t;
1265
1266static int64_t extract_imm(DisasContext *s, uint32_t imm, imm_mode_t imm_mode)
1267{
1268    switch (imm_mode) {
1269    case IMM_ZX:
1270        return extract64(imm, 0, 5);
1271    case IMM_SX:
1272        return sextract64(imm, 0, 5);
1273    case IMM_TRUNC_SEW:
1274        return extract64(imm, 0, s->sew + 3);
1275    case IMM_TRUNC_2SEW:
1276        return extract64(imm, 0, s->sew + 4);
1277    default:
1278        g_assert_not_reached();
1279    }
1280}
1281
1282static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
1283                        gen_helper_opivx *fn, DisasContext *s,
1284                        imm_mode_t imm_mode)
1285{
1286    TCGv_ptr dest, src2, mask;
1287    TCGv src1;
1288    TCGv_i32 desc;
1289    uint32_t data = 0;
1290
1291    TCGLabel *over = gen_new_label();
1292    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1293
1294    dest = tcg_temp_new_ptr();
1295    mask = tcg_temp_new_ptr();
1296    src2 = tcg_temp_new_ptr();
1297    src1 = tcg_constant_tl(extract_imm(s, imm, imm_mode));
1298
1299    data = FIELD_DP32(data, VDATA, VM, vm);
1300    data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1301    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
1302
1303    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
1304    tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
1305    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
1306
1307    fn(dest, mask, src1, src2, cpu_env, desc);
1308
1309    tcg_temp_free_ptr(dest);
1310    tcg_temp_free_ptr(mask);
1311    tcg_temp_free_ptr(src2);
1312    mark_vs_dirty(s);
1313    gen_set_label(over);
1314    return true;
1315}
1316
1317typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
1318                         uint32_t, uint32_t);
1319
1320static inline bool
1321do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
1322              gen_helper_opivx *fn, imm_mode_t imm_mode)
1323{
1324    if (!opivx_check(s, a)) {
1325        return false;
1326    }
1327
1328    if (a->vm && s->vl_eq_vlmax) {
1329        gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
1330                extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s));
1331        mark_vs_dirty(s);
1332        return true;
1333    }
1334    return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, imm_mode);
1335}
1336
1337/* OPIVI with GVEC IR */
1338#define GEN_OPIVI_GVEC_TRANS(NAME, IMM_MODE, OPIVX, SUF) \
1339static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
1340{                                                                  \
1341    static gen_helper_opivx * const fns[4] = {                     \
1342        gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h,            \
1343        gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d,            \
1344    };                                                             \
1345    return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF,                 \
1346                         fns[s->sew], IMM_MODE);                   \
1347}
1348
1349GEN_OPIVI_GVEC_TRANS(vadd_vi, IMM_SX, vadd_vx, addi)
1350
1351static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs,
1352                               int64_t c, uint32_t oprsz, uint32_t maxsz)
1353{
1354    TCGv_i64 tmp = tcg_constant_i64(c);
1355    tcg_gen_gvec_rsubs(vece, dofs, aofs, tmp, oprsz, maxsz);
1356}
1357
1358GEN_OPIVI_GVEC_TRANS(vrsub_vi, IMM_SX, vrsub_vx, rsubi)
1359
1360/* Vector Widening Integer Add/Subtract */
1361
1362/* OPIVV with WIDEN */
1363static bool opivv_widen_check(DisasContext *s, arg_rmrr *a)
1364{
1365    return require_rvv(s) &&
1366           vext_check_isa_ill(s) &&
1367           vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
1368}
1369
1370static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
1371                           gen_helper_gvec_4_ptr *fn,
1372                           bool (*checkfn)(DisasContext *, arg_rmrr *))
1373{
1374    if (checkfn(s, a)) {
1375        uint32_t data = 0;
1376        TCGLabel *over = gen_new_label();
1377        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1378
1379        data = FIELD_DP32(data, VDATA, VM, a->vm);
1380        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1381        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
1382                           vreg_ofs(s, a->rs1),
1383                           vreg_ofs(s, a->rs2),
1384                           cpu_env, s->vlen / 8, s->vlen / 8,
1385                           data, fn);
1386        mark_vs_dirty(s);
1387        gen_set_label(over);
1388        return true;
1389    }
1390    return false;
1391}
1392
1393#define GEN_OPIVV_WIDEN_TRANS(NAME, CHECK) \
1394static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
1395{                                                            \
1396    static gen_helper_gvec_4_ptr * const fns[3] = {          \
1397        gen_helper_##NAME##_b,                               \
1398        gen_helper_##NAME##_h,                               \
1399        gen_helper_##NAME##_w                                \
1400    };                                                       \
1401    return do_opivv_widen(s, a, fns[s->sew], CHECK);         \
1402}
1403
1404GEN_OPIVV_WIDEN_TRANS(vwaddu_vv, opivv_widen_check)
1405GEN_OPIVV_WIDEN_TRANS(vwadd_vv, opivv_widen_check)
1406GEN_OPIVV_WIDEN_TRANS(vwsubu_vv, opivv_widen_check)
1407GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check)
1408
1409/* OPIVX with WIDEN */
1410static bool opivx_widen_check(DisasContext *s, arg_rmrr *a)
1411{
1412    return require_rvv(s) &&
1413           vext_check_isa_ill(s) &&
1414           vext_check_ds(s, a->rd, a->rs2, a->vm);
1415}
1416
1417static bool do_opivx_widen(DisasContext *s, arg_rmrr *a,
1418                           gen_helper_opivx *fn)
1419{
1420    if (opivx_widen_check(s, a)) {
1421        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
1422    }
1423    return false;
1424}
1425
1426#define GEN_OPIVX_WIDEN_TRANS(NAME) \
1427static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
1428{                                                            \
1429    static gen_helper_opivx * const fns[3] = {               \
1430        gen_helper_##NAME##_b,                               \
1431        gen_helper_##NAME##_h,                               \
1432        gen_helper_##NAME##_w                                \
1433    };                                                       \
1434    return do_opivx_widen(s, a, fns[s->sew]);                \
1435}
1436
1437GEN_OPIVX_WIDEN_TRANS(vwaddu_vx)
1438GEN_OPIVX_WIDEN_TRANS(vwadd_vx)
1439GEN_OPIVX_WIDEN_TRANS(vwsubu_vx)
1440GEN_OPIVX_WIDEN_TRANS(vwsub_vx)
1441
1442/* WIDEN OPIVV with WIDEN */
1443static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a)
1444{
1445    return require_rvv(s) &&
1446           vext_check_isa_ill(s) &&
1447           vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm);
1448}
1449
1450static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
1451                           gen_helper_gvec_4_ptr *fn)
1452{
1453    if (opiwv_widen_check(s, a)) {
1454        uint32_t data = 0;
1455        TCGLabel *over = gen_new_label();
1456        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1457
1458        data = FIELD_DP32(data, VDATA, VM, a->vm);
1459        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1460        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
1461                           vreg_ofs(s, a->rs1),
1462                           vreg_ofs(s, a->rs2),
1463                           cpu_env, s->vlen / 8, s->vlen / 8, data, fn);
1464        mark_vs_dirty(s);
1465        gen_set_label(over);
1466        return true;
1467    }
1468    return false;
1469}
1470
1471#define GEN_OPIWV_WIDEN_TRANS(NAME) \
1472static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
1473{                                                            \
1474    static gen_helper_gvec_4_ptr * const fns[3] = {          \
1475        gen_helper_##NAME##_b,                               \
1476        gen_helper_##NAME##_h,                               \
1477        gen_helper_##NAME##_w                                \
1478    };                                                       \
1479    return do_opiwv_widen(s, a, fns[s->sew]);                \
1480}
1481
1482GEN_OPIWV_WIDEN_TRANS(vwaddu_wv)
1483GEN_OPIWV_WIDEN_TRANS(vwadd_wv)
1484GEN_OPIWV_WIDEN_TRANS(vwsubu_wv)
1485GEN_OPIWV_WIDEN_TRANS(vwsub_wv)
1486
1487/* WIDEN OPIVX with WIDEN */
1488static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a)
1489{
1490    return require_rvv(s) &&
1491           vext_check_isa_ill(s) &&
1492           vext_check_dd(s, a->rd, a->rs2, a->vm);
1493}
1494
1495static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a,
1496                           gen_helper_opivx *fn)
1497{
1498    if (opiwx_widen_check(s, a)) {
1499        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
1500    }
1501    return false;
1502}
1503
1504#define GEN_OPIWX_WIDEN_TRANS(NAME) \
1505static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
1506{                                                            \
1507    static gen_helper_opivx * const fns[3] = {               \
1508        gen_helper_##NAME##_b,                               \
1509        gen_helper_##NAME##_h,                               \
1510        gen_helper_##NAME##_w                                \
1511    };                                                       \
1512    return do_opiwx_widen(s, a, fns[s->sew]);                \
1513}
1514
1515GEN_OPIWX_WIDEN_TRANS(vwaddu_wx)
1516GEN_OPIWX_WIDEN_TRANS(vwadd_wx)
1517GEN_OPIWX_WIDEN_TRANS(vwsubu_wx)
1518GEN_OPIWX_WIDEN_TRANS(vwsub_wx)
1519
1520/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1521/* OPIVV without GVEC IR */
1522#define GEN_OPIVV_TRANS(NAME, CHECK)                               \
1523static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
1524{                                                                  \
1525    if (CHECK(s, a)) {                                             \
1526        uint32_t data = 0;                                         \
1527        static gen_helper_gvec_4_ptr * const fns[4] = {            \
1528            gen_helper_##NAME##_b, gen_helper_##NAME##_h,          \
1529            gen_helper_##NAME##_w, gen_helper_##NAME##_d,          \
1530        };                                                         \
1531        TCGLabel *over = gen_new_label();                          \
1532        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
1533                                                                   \
1534        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
1535        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
1536        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
1537                           vreg_ofs(s, a->rs1),                    \
1538                           vreg_ofs(s, a->rs2), cpu_env,           \
1539                           s->vlen / 8, s->vlen / 8, data,         \
1540                           fns[s->sew]);                           \
1541        mark_vs_dirty(s);                                          \
1542        gen_set_label(over);                                       \
1543        return true;                                               \
1544    }                                                              \
1545    return false;                                                  \
1546}
1547
1548/*
1549 * For vadc and vsbc, an illegal instruction exception is raised if the
1550 * destination vector register is v0 and LMUL > 1. (Section 12.4)
1551 */
1552static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a)
1553{
1554    return require_rvv(s) &&
1555           vext_check_isa_ill(s) &&
1556           (a->rd != 0) &&
1557           vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
1558}
1559
1560GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check)
1561GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check)
1562
1563/*
1564 * For vmadc and vmsbc, an illegal instruction exception is raised if the
1565 * destination vector register overlaps a source vector register group.
1566 */
1567static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a)
1568{
1569    return require_rvv(s) &&
1570           vext_check_isa_ill(s) &&
1571           vext_check_mss(s, a->rd, a->rs1, a->rs2);
1572}
1573
1574GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check)
1575GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check)
1576
1577static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a)
1578{
1579    return require_rvv(s) &&
1580           vext_check_isa_ill(s) &&
1581           (a->rd != 0) &&
1582           vext_check_ss(s, a->rd, a->rs2, a->vm);
1583}
1584
1585/* OPIVX without GVEC IR */
1586#define GEN_OPIVX_TRANS(NAME, CHECK)                                     \
1587static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
1588{                                                                        \
1589    if (CHECK(s, a)) {                                                   \
1590        static gen_helper_opivx * const fns[4] = {                       \
1591            gen_helper_##NAME##_b, gen_helper_##NAME##_h,                \
1592            gen_helper_##NAME##_w, gen_helper_##NAME##_d,                \
1593        };                                                               \
1594                                                                         \
1595        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
1596    }                                                                    \
1597    return false;                                                        \
1598}
1599
1600GEN_OPIVX_TRANS(vadc_vxm, opivx_vadc_check)
1601GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check)
1602
1603static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a)
1604{
1605    return require_rvv(s) &&
1606           vext_check_isa_ill(s) &&
1607           vext_check_ms(s, a->rd, a->rs2);
1608}
1609
1610GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check)
1611GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check)
1612
1613/* OPIVI without GVEC IR */
1614#define GEN_OPIVI_TRANS(NAME, IMM_MODE, OPIVX, CHECK)                    \
1615static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
1616{                                                                        \
1617    if (CHECK(s, a)) {                                                   \
1618        static gen_helper_opivx * const fns[4] = {                       \
1619            gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h,              \
1620            gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d,              \
1621        };                                                               \
1622        return opivi_trans(a->rd, a->rs1, a->rs2, a->vm,                 \
1623                           fns[s->sew], s, IMM_MODE);                    \
1624    }                                                                    \
1625    return false;                                                        \
1626}
1627
1628GEN_OPIVI_TRANS(vadc_vim, IMM_SX, vadc_vxm, opivx_vadc_check)
1629GEN_OPIVI_TRANS(vmadc_vim, IMM_SX, vmadc_vxm, opivx_vmadc_check)
1630
1631/* Vector Bitwise Logical Instructions */
1632GEN_OPIVV_GVEC_TRANS(vand_vv, and)
1633GEN_OPIVV_GVEC_TRANS(vor_vv,  or)
1634GEN_OPIVV_GVEC_TRANS(vxor_vv, xor)
1635GEN_OPIVX_GVEC_TRANS(vand_vx, ands)
1636GEN_OPIVX_GVEC_TRANS(vor_vx,  ors)
1637GEN_OPIVX_GVEC_TRANS(vxor_vx, xors)
1638GEN_OPIVI_GVEC_TRANS(vand_vi, IMM_SX, vand_vx, andi)
1639GEN_OPIVI_GVEC_TRANS(vor_vi, IMM_SX, vor_vx,  ori)
1640GEN_OPIVI_GVEC_TRANS(vxor_vi, IMM_SX, vxor_vx, xori)
1641
1642/* Vector Single-Width Bit Shift Instructions */
1643GEN_OPIVV_GVEC_TRANS(vsll_vv,  shlv)
1644GEN_OPIVV_GVEC_TRANS(vsrl_vv,  shrv)
1645GEN_OPIVV_GVEC_TRANS(vsra_vv,  sarv)
1646
1647typedef void GVecGen2sFn32(unsigned, uint32_t, uint32_t, TCGv_i32,
1648                           uint32_t, uint32_t);
1649
1650static inline bool
1651do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
1652                    gen_helper_opivx *fn)
1653{
1654    if (!opivx_check(s, a)) {
1655        return false;
1656    }
1657
1658    if (a->vm && s->vl_eq_vlmax) {
1659        TCGv_i32 src1 = tcg_temp_new_i32();
1660
1661        tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE));
1662        tcg_gen_extract_i32(src1, src1, 0, s->sew + 3);
1663        gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
1664                src1, MAXSZ(s), MAXSZ(s));
1665
1666        tcg_temp_free_i32(src1);
1667        mark_vs_dirty(s);
1668        return true;
1669    }
1670    return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
1671}
1672
1673#define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \
1674static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                    \
1675{                                                                         \
1676    static gen_helper_opivx * const fns[4] = {                            \
1677        gen_helper_##NAME##_b, gen_helper_##NAME##_h,                     \
1678        gen_helper_##NAME##_w, gen_helper_##NAME##_d,                     \
1679    };                                                                    \
1680                                                                          \
1681    return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);    \
1682}
1683
1684GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx,  shls)
1685GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx,  shrs)
1686GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx,  sars)
1687
1688GEN_OPIVI_GVEC_TRANS(vsll_vi, IMM_TRUNC_SEW, vsll_vx, shli)
1689GEN_OPIVI_GVEC_TRANS(vsrl_vi, IMM_TRUNC_SEW, vsrl_vx, shri)
1690GEN_OPIVI_GVEC_TRANS(vsra_vi, IMM_TRUNC_SEW, vsra_vx, sari)
1691
1692/* Vector Narrowing Integer Right Shift Instructions */
1693static bool opiwv_narrow_check(DisasContext *s, arg_rmrr *a)
1694{
1695    return require_rvv(s) &&
1696           vext_check_isa_ill(s) &&
1697           vext_check_sds(s, a->rd, a->rs1, a->rs2, a->vm);
1698}
1699
1700/* OPIVV with NARROW */
1701#define GEN_OPIWV_NARROW_TRANS(NAME)                               \
1702static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
1703{                                                                  \
1704    if (opiwv_narrow_check(s, a)) {                                \
1705        uint32_t data = 0;                                         \
1706        static gen_helper_gvec_4_ptr * const fns[3] = {            \
1707            gen_helper_##NAME##_b,                                 \
1708            gen_helper_##NAME##_h,                                 \
1709            gen_helper_##NAME##_w,                                 \
1710        };                                                         \
1711        TCGLabel *over = gen_new_label();                          \
1712        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
1713                                                                   \
1714        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
1715        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
1716        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
1717                           vreg_ofs(s, a->rs1),                    \
1718                           vreg_ofs(s, a->rs2), cpu_env,           \
1719                           s->vlen / 8, s->vlen / 8, data,         \
1720                           fns[s->sew]);                           \
1721        mark_vs_dirty(s);                                          \
1722        gen_set_label(over);                                       \
1723        return true;                                               \
1724    }                                                              \
1725    return false;                                                  \
1726}
1727GEN_OPIWV_NARROW_TRANS(vnsra_wv)
1728GEN_OPIWV_NARROW_TRANS(vnsrl_wv)
1729
1730static bool opiwx_narrow_check(DisasContext *s, arg_rmrr *a)
1731{
1732    return require_rvv(s) &&
1733           vext_check_isa_ill(s) &&
1734           vext_check_sd(s, a->rd, a->rs2, a->vm);
1735}
1736
1737/* OPIVX with NARROW */
1738#define GEN_OPIWX_NARROW_TRANS(NAME)                                     \
1739static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
1740{                                                                        \
1741    if (opiwx_narrow_check(s, a)) {                                      \
1742        static gen_helper_opivx * const fns[3] = {                       \
1743            gen_helper_##NAME##_b,                                       \
1744            gen_helper_##NAME##_h,                                       \
1745            gen_helper_##NAME##_w,                                       \
1746        };                                                               \
1747        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
1748    }                                                                    \
1749    return false;                                                        \
1750}
1751
1752GEN_OPIWX_NARROW_TRANS(vnsra_wx)
1753GEN_OPIWX_NARROW_TRANS(vnsrl_wx)
1754
1755/* OPIWI with NARROW */
1756#define GEN_OPIWI_NARROW_TRANS(NAME, IMM_MODE, OPIVX)                    \
1757static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
1758{                                                                        \
1759    if (opiwx_narrow_check(s, a)) {                                      \
1760        static gen_helper_opivx * const fns[3] = {                       \
1761            gen_helper_##OPIVX##_b,                                      \
1762            gen_helper_##OPIVX##_h,                                      \
1763            gen_helper_##OPIVX##_w,                                      \
1764        };                                                               \
1765        return opivi_trans(a->rd, a->rs1, a->rs2, a->vm,                 \
1766                           fns[s->sew], s, IMM_MODE);                    \
1767    }                                                                    \
1768    return false;                                                        \
1769}
1770
1771GEN_OPIWI_NARROW_TRANS(vnsra_wi, IMM_ZX, vnsra_wx)
1772GEN_OPIWI_NARROW_TRANS(vnsrl_wi, IMM_ZX, vnsrl_wx)
1773
1774/* Vector Integer Comparison Instructions */
1775/*
1776 * For all comparison instructions, an illegal instruction exception is raised
1777 * if the destination vector register overlaps a source vector register group
1778 * and LMUL > 1.
1779 */
1780static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a)
1781{
1782    return require_rvv(s) &&
1783           vext_check_isa_ill(s) &&
1784           vext_check_mss(s, a->rd, a->rs1, a->rs2);
1785}
1786
1787GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check)
1788GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check)
1789GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check)
1790GEN_OPIVV_TRANS(vmslt_vv, opivv_cmp_check)
1791GEN_OPIVV_TRANS(vmsleu_vv, opivv_cmp_check)
1792GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check)
1793
1794static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a)
1795{
1796    return require_rvv(s) &&
1797           vext_check_isa_ill(s) &&
1798           vext_check_ms(s, a->rd, a->rs2);
1799}
1800
1801GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check)
1802GEN_OPIVX_TRANS(vmsne_vx, opivx_cmp_check)
1803GEN_OPIVX_TRANS(vmsltu_vx, opivx_cmp_check)
1804GEN_OPIVX_TRANS(vmslt_vx, opivx_cmp_check)
1805GEN_OPIVX_TRANS(vmsleu_vx, opivx_cmp_check)
1806GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check)
1807GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check)
1808GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check)
1809
1810GEN_OPIVI_TRANS(vmseq_vi, IMM_SX, vmseq_vx, opivx_cmp_check)
1811GEN_OPIVI_TRANS(vmsne_vi, IMM_SX, vmsne_vx, opivx_cmp_check)
1812GEN_OPIVI_TRANS(vmsleu_vi, IMM_SX, vmsleu_vx, opivx_cmp_check)
1813GEN_OPIVI_TRANS(vmsle_vi, IMM_SX, vmsle_vx, opivx_cmp_check)
1814GEN_OPIVI_TRANS(vmsgtu_vi, IMM_SX, vmsgtu_vx, opivx_cmp_check)
1815GEN_OPIVI_TRANS(vmsgt_vi, IMM_SX, vmsgt_vx, opivx_cmp_check)
1816
1817/* Vector Integer Min/Max Instructions */
1818GEN_OPIVV_GVEC_TRANS(vminu_vv, umin)
1819GEN_OPIVV_GVEC_TRANS(vmin_vv,  smin)
1820GEN_OPIVV_GVEC_TRANS(vmaxu_vv, umax)
1821GEN_OPIVV_GVEC_TRANS(vmax_vv,  smax)
1822GEN_OPIVX_TRANS(vminu_vx, opivx_check)
1823GEN_OPIVX_TRANS(vmin_vx,  opivx_check)
1824GEN_OPIVX_TRANS(vmaxu_vx, opivx_check)
1825GEN_OPIVX_TRANS(vmax_vx,  opivx_check)
1826
1827/* Vector Single-Width Integer Multiply Instructions */
1828GEN_OPIVV_GVEC_TRANS(vmul_vv,  mul)
1829GEN_OPIVV_TRANS(vmulh_vv, opivv_check)
1830GEN_OPIVV_TRANS(vmulhu_vv, opivv_check)
1831GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check)
1832GEN_OPIVX_GVEC_TRANS(vmul_vx,  muls)
1833GEN_OPIVX_TRANS(vmulh_vx, opivx_check)
1834GEN_OPIVX_TRANS(vmulhu_vx, opivx_check)
1835GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check)
1836
1837/* Vector Integer Divide Instructions */
1838GEN_OPIVV_TRANS(vdivu_vv, opivv_check)
1839GEN_OPIVV_TRANS(vdiv_vv, opivv_check)
1840GEN_OPIVV_TRANS(vremu_vv, opivv_check)
1841GEN_OPIVV_TRANS(vrem_vv, opivv_check)
1842GEN_OPIVX_TRANS(vdivu_vx, opivx_check)
1843GEN_OPIVX_TRANS(vdiv_vx, opivx_check)
1844GEN_OPIVX_TRANS(vremu_vx, opivx_check)
1845GEN_OPIVX_TRANS(vrem_vx, opivx_check)
1846
1847/* Vector Widening Integer Multiply Instructions */
1848GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check)
1849GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check)
1850GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check)
1851GEN_OPIVX_WIDEN_TRANS(vwmul_vx)
1852GEN_OPIVX_WIDEN_TRANS(vwmulu_vx)
1853GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx)
1854
1855/* Vector Single-Width Integer Multiply-Add Instructions */
1856GEN_OPIVV_TRANS(vmacc_vv, opivv_check)
1857GEN_OPIVV_TRANS(vnmsac_vv, opivv_check)
1858GEN_OPIVV_TRANS(vmadd_vv, opivv_check)
1859GEN_OPIVV_TRANS(vnmsub_vv, opivv_check)
1860GEN_OPIVX_TRANS(vmacc_vx, opivx_check)
1861GEN_OPIVX_TRANS(vnmsac_vx, opivx_check)
1862GEN_OPIVX_TRANS(vmadd_vx, opivx_check)
1863GEN_OPIVX_TRANS(vnmsub_vx, opivx_check)
1864
1865/* Vector Widening Integer Multiply-Add Instructions */
1866GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check)
1867GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check)
1868GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check)
1869GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx)
1870GEN_OPIVX_WIDEN_TRANS(vwmacc_vx)
1871GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx)
1872GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx)
1873
1874/* Vector Integer Merge and Move Instructions */
1875static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
1876{
1877    if (require_rvv(s) &&
1878        vext_check_isa_ill(s) &&
1879        /* vmv.v.v has rs2 = 0 and vm = 1 */
1880        vext_check_sss(s, a->rd, a->rs1, 0, 1)) {
1881        if (s->vl_eq_vlmax) {
1882            tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd),
1883                             vreg_ofs(s, a->rs1),
1884                             MAXSZ(s), MAXSZ(s));
1885        } else {
1886            uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
1887            static gen_helper_gvec_2_ptr * const fns[4] = {
1888                gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h,
1889                gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
1890            };
1891            TCGLabel *over = gen_new_label();
1892            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1893
1894            tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
1895                               cpu_env, s->vlen / 8, s->vlen / 8, data,
1896                               fns[s->sew]);
1897            gen_set_label(over);
1898        }
1899        mark_vs_dirty(s);
1900        return true;
1901    }
1902    return false;
1903}
1904
1905typedef void gen_helper_vmv_vx(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32);
1906static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
1907{
1908    if (require_rvv(s) &&
1909        vext_check_isa_ill(s) &&
1910        /* vmv.v.x has rs2 = 0 and vm = 1 */
1911        vext_check_ss(s, a->rd, 0, 1)) {
1912        TCGv s1;
1913        TCGLabel *over = gen_new_label();
1914        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1915
1916        s1 = get_gpr(s, a->rs1, EXT_SIGN);
1917
1918        if (s->vl_eq_vlmax) {
1919            tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
1920                                MAXSZ(s), MAXSZ(s), s1);
1921        } else {
1922            TCGv_i32 desc;
1923            TCGv_i64 s1_i64 = tcg_temp_new_i64();
1924            TCGv_ptr dest = tcg_temp_new_ptr();
1925            uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
1926            static gen_helper_vmv_vx * const fns[4] = {
1927                gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
1928                gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
1929            };
1930
1931            tcg_gen_ext_tl_i64(s1_i64, s1);
1932            desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
1933            tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
1934            fns[s->sew](dest, s1_i64, cpu_env, desc);
1935
1936            tcg_temp_free_ptr(dest);
1937            tcg_temp_free_i64(s1_i64);
1938        }
1939
1940        mark_vs_dirty(s);
1941        gen_set_label(over);
1942        return true;
1943    }
1944    return false;
1945}
1946
1947static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
1948{
1949    if (require_rvv(s) &&
1950        vext_check_isa_ill(s) &&
1951        /* vmv.v.i has rs2 = 0 and vm = 1 */
1952        vext_check_ss(s, a->rd, 0, 1)) {
1953        int64_t simm = sextract64(a->rs1, 0, 5);
1954        if (s->vl_eq_vlmax) {
1955            tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd),
1956                                 MAXSZ(s), MAXSZ(s), simm);
1957            mark_vs_dirty(s);
1958        } else {
1959            TCGv_i32 desc;
1960            TCGv_i64 s1;
1961            TCGv_ptr dest;
1962            uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
1963            static gen_helper_vmv_vx * const fns[4] = {
1964                gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
1965                gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
1966            };
1967            TCGLabel *over = gen_new_label();
1968            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
1969
1970            s1 = tcg_constant_i64(simm);
1971            dest = tcg_temp_new_ptr();
1972            desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
1973            tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
1974            fns[s->sew](dest, s1, cpu_env, desc);
1975
1976            tcg_temp_free_ptr(dest);
1977            mark_vs_dirty(s);
1978            gen_set_label(over);
1979        }
1980        return true;
1981    }
1982    return false;
1983}
1984
1985GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check)
1986GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check)
1987GEN_OPIVI_TRANS(vmerge_vim, IMM_SX, vmerge_vxm, opivx_vadc_check)
1988
1989/*
1990 *** Vector Fixed-Point Arithmetic Instructions
1991 */
1992
1993/* Vector Single-Width Saturating Add and Subtract */
1994GEN_OPIVV_TRANS(vsaddu_vv, opivv_check)
1995GEN_OPIVV_TRANS(vsadd_vv,  opivv_check)
1996GEN_OPIVV_TRANS(vssubu_vv, opivv_check)
1997GEN_OPIVV_TRANS(vssub_vv,  opivv_check)
1998GEN_OPIVX_TRANS(vsaddu_vx,  opivx_check)
1999GEN_OPIVX_TRANS(vsadd_vx,  opivx_check)
2000GEN_OPIVX_TRANS(vssubu_vx,  opivx_check)
2001GEN_OPIVX_TRANS(vssub_vx,  opivx_check)
2002GEN_OPIVI_TRANS(vsaddu_vi, IMM_SX, vsaddu_vx, opivx_check)
2003GEN_OPIVI_TRANS(vsadd_vi, IMM_SX, vsadd_vx, opivx_check)
2004
2005/* Vector Single-Width Averaging Add and Subtract */
2006GEN_OPIVV_TRANS(vaadd_vv, opivv_check)
2007GEN_OPIVV_TRANS(vaaddu_vv, opivv_check)
2008GEN_OPIVV_TRANS(vasub_vv, opivv_check)
2009GEN_OPIVV_TRANS(vasubu_vv, opivv_check)
2010GEN_OPIVX_TRANS(vaadd_vx,  opivx_check)
2011GEN_OPIVX_TRANS(vaaddu_vx,  opivx_check)
2012GEN_OPIVX_TRANS(vasub_vx,  opivx_check)
2013GEN_OPIVX_TRANS(vasubu_vx,  opivx_check)
2014
2015/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2016GEN_OPIVV_TRANS(vsmul_vv, opivv_check)
2017GEN_OPIVX_TRANS(vsmul_vx,  opivx_check)
2018
2019/* Vector Widening Saturating Scaled Multiply-Add */
2020GEN_OPIVV_WIDEN_TRANS(vwsmaccu_vv, opivv_widen_check)
2021GEN_OPIVV_WIDEN_TRANS(vwsmacc_vv, opivv_widen_check)
2022GEN_OPIVV_WIDEN_TRANS(vwsmaccsu_vv, opivv_widen_check)
2023GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx)
2024GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx)
2025GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx)
2026GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx)
2027
2028/* Vector Single-Width Scaling Shift Instructions */
2029GEN_OPIVV_TRANS(vssrl_vv, opivv_check)
2030GEN_OPIVV_TRANS(vssra_vv, opivv_check)
2031GEN_OPIVX_TRANS(vssrl_vx,  opivx_check)
2032GEN_OPIVX_TRANS(vssra_vx,  opivx_check)
2033GEN_OPIVI_TRANS(vssrl_vi, IMM_ZX, vssrl_vx, opivx_check)
2034GEN_OPIVI_TRANS(vssra_vi, IMM_SX, vssra_vx, opivx_check)
2035
2036/* Vector Narrowing Fixed-Point Clip Instructions */
2037GEN_OPIWV_NARROW_TRANS(vnclipu_wv)
2038GEN_OPIWV_NARROW_TRANS(vnclip_wv)
2039GEN_OPIWX_NARROW_TRANS(vnclipu_wx)
2040GEN_OPIWX_NARROW_TRANS(vnclip_wx)
2041GEN_OPIWI_NARROW_TRANS(vnclipu_wi, IMM_ZX, vnclipu_wx)
2042GEN_OPIWI_NARROW_TRANS(vnclip_wi, IMM_ZX, vnclip_wx)
2043
2044/*
2045 *** Vector Float Point Arithmetic Instructions
2046 */
2047
2048/*
2049 * As RVF-only cpus always have values NaN-boxed to 64-bits,
2050 * RVF and RVD can be treated equally.
2051 * We don't have to deal with the cases of: SEW > FLEN.
2052 *
2053 * If SEW < FLEN, check whether input fp register is a valid
2054 * NaN-boxed value, in which case the least-significant SEW bits
2055 * of the f regsiter are used, else the canonical NaN value is used.
2056 */
2057static void do_nanbox(DisasContext *s, TCGv_i64 out, TCGv_i64 in)
2058{
2059    switch (s->sew) {
2060    case 1:
2061        gen_check_nanbox_h(out, in);
2062        break;
2063    case 2:
2064        gen_check_nanbox_s(out, in);
2065        break;
2066    case 3:
2067        tcg_gen_mov_i64(out, in);
2068        break;
2069    default:
2070        g_assert_not_reached();
2071    }
2072}
2073
2074/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2075
2076/*
2077 * If the current SEW does not correspond to a supported IEEE floating-point
2078 * type, an illegal instruction exception is raised.
2079 */
2080static bool opfvv_check(DisasContext *s, arg_rmrr *a)
2081{
2082    return require_rvv(s) &&
2083           require_rvf(s) &&
2084           vext_check_isa_ill(s) &&
2085           vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
2086}
2087
2088/* OPFVV without GVEC IR */
2089#define GEN_OPFVV_TRANS(NAME, CHECK)                               \
2090static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
2091{                                                                  \
2092    if (CHECK(s, a)) {                                             \
2093        uint32_t data = 0;                                         \
2094        static gen_helper_gvec_4_ptr * const fns[3] = {            \
2095            gen_helper_##NAME##_h,                                 \
2096            gen_helper_##NAME##_w,                                 \
2097            gen_helper_##NAME##_d,                                 \
2098        };                                                         \
2099        TCGLabel *over = gen_new_label();                          \
2100        gen_set_rm(s, 7);                                          \
2101        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2102                                                                   \
2103        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2104        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2105        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2106                           vreg_ofs(s, a->rs1),                    \
2107                           vreg_ofs(s, a->rs2), cpu_env,           \
2108                           s->vlen / 8, s->vlen / 8, data,         \
2109                           fns[s->sew - 1]);                       \
2110        mark_vs_dirty(s);                                          \
2111        gen_set_label(over);                                       \
2112        return true;                                               \
2113    }                                                              \
2114    return false;                                                  \
2115}
2116GEN_OPFVV_TRANS(vfadd_vv, opfvv_check)
2117GEN_OPFVV_TRANS(vfsub_vv, opfvv_check)
2118
2119typedef void gen_helper_opfvf(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr,
2120                              TCGv_env, TCGv_i32);
2121
2122static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
2123                        uint32_t data, gen_helper_opfvf *fn, DisasContext *s)
2124{
2125    TCGv_ptr dest, src2, mask;
2126    TCGv_i32 desc;
2127    TCGv_i64 t1;
2128
2129    TCGLabel *over = gen_new_label();
2130    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
2131
2132    dest = tcg_temp_new_ptr();
2133    mask = tcg_temp_new_ptr();
2134    src2 = tcg_temp_new_ptr();
2135    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
2136
2137    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
2138    tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
2139    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
2140
2141    /* NaN-box f[rs1] */
2142    t1 = tcg_temp_new_i64();
2143    do_nanbox(s, t1, cpu_fpr[rs1]);
2144
2145    fn(dest, mask, t1, src2, cpu_env, desc);
2146
2147    tcg_temp_free_ptr(dest);
2148    tcg_temp_free_ptr(mask);
2149    tcg_temp_free_ptr(src2);
2150    tcg_temp_free_i64(t1);
2151    mark_vs_dirty(s);
2152    gen_set_label(over);
2153    return true;
2154}
2155
2156/*
2157 * If the current SEW does not correspond to a supported IEEE floating-point
2158 * type, an illegal instruction exception is raised
2159 */
2160static bool opfvf_check(DisasContext *s, arg_rmrr *a)
2161{
2162    return require_rvv(s) &&
2163           require_rvf(s) &&
2164           vext_check_isa_ill(s) &&
2165           vext_check_ss(s, a->rd, a->rs2, a->vm);
2166}
2167
2168/* OPFVF without GVEC IR */
2169#define GEN_OPFVF_TRANS(NAME, CHECK)                              \
2170static bool trans_##NAME(DisasContext *s, arg_rmrr *a)            \
2171{                                                                 \
2172    if (CHECK(s, a)) {                                            \
2173        uint32_t data = 0;                                        \
2174        static gen_helper_opfvf *const fns[3] = {                 \
2175            gen_helper_##NAME##_h,                                \
2176            gen_helper_##NAME##_w,                                \
2177            gen_helper_##NAME##_d,                                \
2178        };                                                        \
2179        gen_set_rm(s, 7);                                         \
2180        data = FIELD_DP32(data, VDATA, VM, a->vm);                \
2181        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);            \
2182        return opfvf_trans(a->rd, a->rs1, a->rs2, data,           \
2183                           fns[s->sew - 1], s);                   \
2184    }                                                             \
2185    return false;                                                 \
2186}
2187
2188GEN_OPFVF_TRANS(vfadd_vf,  opfvf_check)
2189GEN_OPFVF_TRANS(vfsub_vf,  opfvf_check)
2190GEN_OPFVF_TRANS(vfrsub_vf,  opfvf_check)
2191
2192/* Vector Widening Floating-Point Add/Subtract Instructions */
2193static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a)
2194{
2195    return require_rvv(s) &&
2196           require_rvf(s) &&
2197           vext_check_isa_ill(s) &&
2198           vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
2199}
2200
2201/* OPFVV with WIDEN */
2202#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK)                       \
2203static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
2204{                                                                \
2205    if (CHECK(s, a)) {                                           \
2206        uint32_t data = 0;                                       \
2207        static gen_helper_gvec_4_ptr * const fns[2] = {          \
2208            gen_helper_##NAME##_h, gen_helper_##NAME##_w,        \
2209        };                                                       \
2210        TCGLabel *over = gen_new_label();                        \
2211        gen_set_rm(s, 7);                                        \
2212        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);        \
2213                                                                 \
2214        data = FIELD_DP32(data, VDATA, VM, a->vm);               \
2215        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
2216        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),   \
2217                           vreg_ofs(s, a->rs1),                  \
2218                           vreg_ofs(s, a->rs2), cpu_env,         \
2219                           s->vlen / 8, s->vlen / 8, data,       \
2220                           fns[s->sew - 1]);                     \
2221        mark_vs_dirty(s);                                        \
2222        gen_set_label(over);                                     \
2223        return true;                                             \
2224    }                                                            \
2225    return false;                                                \
2226}
2227
2228GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check)
2229GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check)
2230
2231static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a)
2232{
2233    return require_rvv(s) &&
2234           require_rvf(s) &&
2235           vext_check_isa_ill(s) &&
2236           vext_check_ds(s, a->rd, a->rs2, a->vm);
2237}
2238
2239/* OPFVF with WIDEN */
2240#define GEN_OPFVF_WIDEN_TRANS(NAME)                              \
2241static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
2242{                                                                \
2243    if (opfvf_widen_check(s, a)) {                               \
2244        uint32_t data = 0;                                       \
2245        static gen_helper_opfvf *const fns[2] = {                \
2246            gen_helper_##NAME##_h, gen_helper_##NAME##_w,        \
2247        };                                                       \
2248        gen_set_rm(s, 7);                                        \
2249        data = FIELD_DP32(data, VDATA, VM, a->vm);               \
2250        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
2251        return opfvf_trans(a->rd, a->rs1, a->rs2, data,          \
2252                           fns[s->sew - 1], s);                  \
2253    }                                                            \
2254    return false;                                                \
2255}
2256
2257GEN_OPFVF_WIDEN_TRANS(vfwadd_vf)
2258GEN_OPFVF_WIDEN_TRANS(vfwsub_vf)
2259
2260static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a)
2261{
2262    return require_rvv(s) &&
2263           require_rvf(s) &&
2264           vext_check_isa_ill(s) &&
2265           vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm);
2266}
2267
2268/* WIDEN OPFVV with WIDEN */
2269#define GEN_OPFWV_WIDEN_TRANS(NAME)                                \
2270static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
2271{                                                                  \
2272    if (opfwv_widen_check(s, a)) {                                 \
2273        uint32_t data = 0;                                         \
2274        static gen_helper_gvec_4_ptr * const fns[2] = {            \
2275            gen_helper_##NAME##_h, gen_helper_##NAME##_w,          \
2276        };                                                         \
2277        TCGLabel *over = gen_new_label();                          \
2278        gen_set_rm(s, 7);                                          \
2279        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2280                                                                   \
2281        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2282        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2283        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2284                           vreg_ofs(s, a->rs1),                    \
2285                           vreg_ofs(s, a->rs2), cpu_env,           \
2286                           s->vlen / 8, s->vlen / 8, data,         \
2287                           fns[s->sew - 1]);                       \
2288        mark_vs_dirty(s);                                          \
2289        gen_set_label(over);                                       \
2290        return true;                                               \
2291    }                                                              \
2292    return false;                                                  \
2293}
2294
2295GEN_OPFWV_WIDEN_TRANS(vfwadd_wv)
2296GEN_OPFWV_WIDEN_TRANS(vfwsub_wv)
2297
2298static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a)
2299{
2300    return require_rvv(s) &&
2301           require_rvf(s) &&
2302           vext_check_isa_ill(s) &&
2303           vext_check_dd(s, a->rd, a->rs2, a->vm);
2304}
2305
2306/* WIDEN OPFVF with WIDEN */
2307#define GEN_OPFWF_WIDEN_TRANS(NAME)                              \
2308static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
2309{                                                                \
2310    if (opfwf_widen_check(s, a)) {                               \
2311        uint32_t data = 0;                                       \
2312        static gen_helper_opfvf *const fns[2] = {                \
2313            gen_helper_##NAME##_h, gen_helper_##NAME##_w,        \
2314        };                                                       \
2315        gen_set_rm(s, 7);                                        \
2316        data = FIELD_DP32(data, VDATA, VM, a->vm);               \
2317        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
2318        return opfvf_trans(a->rd, a->rs1, a->rs2, data,          \
2319                           fns[s->sew - 1], s);                  \
2320    }                                                            \
2321    return false;                                                \
2322}
2323
2324GEN_OPFWF_WIDEN_TRANS(vfwadd_wf)
2325GEN_OPFWF_WIDEN_TRANS(vfwsub_wf)
2326
2327/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2328GEN_OPFVV_TRANS(vfmul_vv, opfvv_check)
2329GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check)
2330GEN_OPFVF_TRANS(vfmul_vf,  opfvf_check)
2331GEN_OPFVF_TRANS(vfdiv_vf,  opfvf_check)
2332GEN_OPFVF_TRANS(vfrdiv_vf,  opfvf_check)
2333
2334/* Vector Widening Floating-Point Multiply */
2335GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check)
2336GEN_OPFVF_WIDEN_TRANS(vfwmul_vf)
2337
2338/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
2339GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check)
2340GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check)
2341GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check)
2342GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check)
2343GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check)
2344GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check)
2345GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check)
2346GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check)
2347GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check)
2348GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check)
2349GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check)
2350GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check)
2351GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check)
2352GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check)
2353GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check)
2354GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check)
2355
2356/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
2357GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check)
2358GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check)
2359GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check)
2360GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check)
2361GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf)
2362GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf)
2363GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf)
2364GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf)
2365
2366/* Vector Floating-Point Square-Root Instruction */
2367
2368/*
2369 * If the current SEW does not correspond to a supported IEEE floating-point
2370 * type, an illegal instruction exception is raised
2371 */
2372static bool opfv_check(DisasContext *s, arg_rmr *a)
2373{
2374    return require_rvv(s) &&
2375           require_rvf(s) &&
2376           vext_check_isa_ill(s) &&
2377           /* OPFV instructions ignore vs1 check */
2378           vext_check_ss(s, a->rd, a->rs2, a->vm);
2379}
2380
2381#define GEN_OPFV_TRANS(NAME, CHECK)                                \
2382static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
2383{                                                                  \
2384    if (CHECK(s, a)) {                                             \
2385        uint32_t data = 0;                                         \
2386        static gen_helper_gvec_3_ptr * const fns[3] = {            \
2387            gen_helper_##NAME##_h,                                 \
2388            gen_helper_##NAME##_w,                                 \
2389            gen_helper_##NAME##_d,                                 \
2390        };                                                         \
2391        TCGLabel *over = gen_new_label();                          \
2392        gen_set_rm(s, 7);                                          \
2393        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2394                                                                   \
2395        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2396        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2397        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2398                           vreg_ofs(s, a->rs2), cpu_env,           \
2399                           s->vlen / 8, s->vlen / 8, data,         \
2400                           fns[s->sew - 1]);                       \
2401        mark_vs_dirty(s);                                          \
2402        gen_set_label(over);                                       \
2403        return true;                                               \
2404    }                                                              \
2405    return false;                                                  \
2406}
2407
2408GEN_OPFV_TRANS(vfsqrt_v, opfv_check)
2409
2410/* Vector Floating-Point MIN/MAX Instructions */
2411GEN_OPFVV_TRANS(vfmin_vv, opfvv_check)
2412GEN_OPFVV_TRANS(vfmax_vv, opfvv_check)
2413GEN_OPFVF_TRANS(vfmin_vf, opfvf_check)
2414GEN_OPFVF_TRANS(vfmax_vf, opfvf_check)
2415
2416/* Vector Floating-Point Sign-Injection Instructions */
2417GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check)
2418GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check)
2419GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check)
2420GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check)
2421GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check)
2422GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check)
2423
2424/* Vector Floating-Point Compare Instructions */
2425static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a)
2426{
2427    return require_rvv(s) &&
2428           require_rvf(s) &&
2429           vext_check_isa_ill(s) &&
2430           vext_check_mss(s, a->rd, a->rs1, a->rs2);
2431}
2432
2433GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check)
2434GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check)
2435GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check)
2436GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check)
2437GEN_OPFVV_TRANS(vmford_vv, opfvv_cmp_check)
2438
2439static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a)
2440{
2441    return require_rvv(s) &&
2442           require_rvf(s) &&
2443           vext_check_isa_ill(s) &&
2444           vext_check_ms(s, a->rd, a->rs2);
2445}
2446
2447GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check)
2448GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check)
2449GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check)
2450GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check)
2451GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check)
2452GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check)
2453GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check)
2454
2455/* Vector Floating-Point Classify Instruction */
2456GEN_OPFV_TRANS(vfclass_v, opfv_check)
2457
2458/* Vector Floating-Point Merge Instruction */
2459GEN_OPFVF_TRANS(vfmerge_vfm,  opfvf_check)
2460
2461static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
2462{
2463    if (require_rvv(s) &&
2464        require_rvf(s) &&
2465        vext_check_isa_ill(s) &&
2466        require_align(a->rd, s->lmul)) {
2467        TCGv_i64 t1;
2468
2469        if (s->vl_eq_vlmax) {
2470            t1 = tcg_temp_new_i64();
2471            /* NaN-box f[rs1] */
2472            do_nanbox(s, t1, cpu_fpr[a->rs1]);
2473
2474            tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
2475                                 MAXSZ(s), MAXSZ(s), t1);
2476            mark_vs_dirty(s);
2477        } else {
2478            TCGv_ptr dest;
2479            TCGv_i32 desc;
2480            uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
2481            static gen_helper_vmv_vx * const fns[3] = {
2482                gen_helper_vmv_v_x_h,
2483                gen_helper_vmv_v_x_w,
2484                gen_helper_vmv_v_x_d,
2485            };
2486            TCGLabel *over = gen_new_label();
2487            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
2488
2489            t1 = tcg_temp_new_i64();
2490            /* NaN-box f[rs1] */
2491            do_nanbox(s, t1, cpu_fpr[a->rs1]);
2492
2493            dest = tcg_temp_new_ptr();
2494            desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
2495            tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
2496
2497            fns[s->sew - 1](dest, t1, cpu_env, desc);
2498
2499            tcg_temp_free_ptr(dest);
2500            mark_vs_dirty(s);
2501            gen_set_label(over);
2502        }
2503        tcg_temp_free_i64(t1);
2504        return true;
2505    }
2506    return false;
2507}
2508
2509/* Single-Width Floating-Point/Integer Type-Convert Instructions */
2510GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check)
2511GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check)
2512GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check)
2513GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check)
2514
2515/* Widening Floating-Point/Integer Type-Convert Instructions */
2516
2517/*
2518 * If the current SEW does not correspond to a supported IEEE floating-point
2519 * type, an illegal instruction exception is raised
2520 */
2521static bool opfv_widen_check(DisasContext *s, arg_rmr *a)
2522{
2523    return require_rvv(s) &&
2524           require_scale_rvf(s) &&
2525           (s->sew != MO_8) &&
2526           vext_check_isa_ill(s) &&
2527           vext_check_ds(s, a->rd, a->rs2, a->vm);
2528}
2529
2530#define GEN_OPFV_WIDEN_TRANS(NAME)                                 \
2531static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
2532{                                                                  \
2533    if (opfv_widen_check(s, a)) {                                  \
2534        uint32_t data = 0;                                         \
2535        static gen_helper_gvec_3_ptr * const fns[2] = {            \
2536            gen_helper_##NAME##_h,                                 \
2537            gen_helper_##NAME##_w,                                 \
2538        };                                                         \
2539        TCGLabel *over = gen_new_label();                          \
2540        gen_set_rm(s, 7);                                          \
2541        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2542                                                                   \
2543        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2544        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2545        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2546                           vreg_ofs(s, a->rs2), cpu_env,           \
2547                           s->vlen / 8, s->vlen / 8, data,         \
2548                           fns[s->sew - 1]);                       \
2549        mark_vs_dirty(s);                                          \
2550        gen_set_label(over);                                       \
2551        return true;                                               \
2552    }                                                              \
2553    return false;                                                  \
2554}
2555
2556GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v)
2557GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v)
2558GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v)
2559GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v)
2560GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v)
2561
2562/* Narrowing Floating-Point/Integer Type-Convert Instructions */
2563
2564/*
2565 * If the current SEW does not correspond to a supported IEEE floating-point
2566 * type, an illegal instruction exception is raised
2567 */
2568static bool opfv_narrow_check(DisasContext *s, arg_rmr *a)
2569{
2570    return require_rvv(s) &&
2571           require_rvf(s) &&
2572           (s->sew != MO_64) &&
2573           vext_check_isa_ill(s) &&
2574           /* OPFV narrowing instructions ignore vs1 check */
2575           vext_check_sd(s, a->rd, a->rs2, a->vm);
2576}
2577
2578#define GEN_OPFV_NARROW_TRANS(NAME)                                \
2579static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
2580{                                                                  \
2581    if (opfv_narrow_check(s, a)) {                                 \
2582        uint32_t data = 0;                                         \
2583        static gen_helper_gvec_3_ptr * const fns[2] = {            \
2584            gen_helper_##NAME##_h,                                 \
2585            gen_helper_##NAME##_w,                                 \
2586        };                                                         \
2587        TCGLabel *over = gen_new_label();                          \
2588        gen_set_rm(s, 7);                                          \
2589        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2590                                                                   \
2591        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2592        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2593        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2594                           vreg_ofs(s, a->rs2), cpu_env,           \
2595                           s->vlen / 8, s->vlen / 8, data,         \
2596                           fns[s->sew - 1]);                       \
2597        mark_vs_dirty(s);                                          \
2598        gen_set_label(over);                                       \
2599        return true;                                               \
2600    }                                                              \
2601    return false;                                                  \
2602}
2603
2604GEN_OPFV_NARROW_TRANS(vfncvt_xu_f_v)
2605GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v)
2606GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v)
2607GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v)
2608GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v)
2609
2610/*
2611 *** Vector Reduction Operations
2612 */
2613/* Vector Single-Width Integer Reduction Instructions */
2614static bool reduction_check(DisasContext *s, arg_rmrr *a)
2615{
2616    return require_rvv(s) &&
2617           vext_check_isa_ill(s) &&
2618           vext_check_reduction(s, a->rs2);
2619}
2620
2621GEN_OPIVV_TRANS(vredsum_vs, reduction_check)
2622GEN_OPIVV_TRANS(vredmaxu_vs, reduction_check)
2623GEN_OPIVV_TRANS(vredmax_vs, reduction_check)
2624GEN_OPIVV_TRANS(vredminu_vs, reduction_check)
2625GEN_OPIVV_TRANS(vredmin_vs, reduction_check)
2626GEN_OPIVV_TRANS(vredand_vs, reduction_check)
2627GEN_OPIVV_TRANS(vredor_vs, reduction_check)
2628GEN_OPIVV_TRANS(vredxor_vs, reduction_check)
2629
2630/* Vector Widening Integer Reduction Instructions */
2631static bool reduction_widen_check(DisasContext *s, arg_rmrr *a)
2632{
2633    return reduction_check(s, a) && (s->sew < MO_64);
2634}
2635
2636GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_widen_check)
2637GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_widen_check)
2638
2639/* Vector Single-Width Floating-Point Reduction Instructions */
2640GEN_OPFVV_TRANS(vfredsum_vs, reduction_check)
2641GEN_OPFVV_TRANS(vfredmax_vs, reduction_check)
2642GEN_OPFVV_TRANS(vfredmin_vs, reduction_check)
2643
2644/* Vector Widening Floating-Point Reduction Instructions */
2645GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check)
2646
2647/*
2648 *** Vector Mask Operations
2649 */
2650
2651/* Vector Mask-Register Logical Instructions */
2652#define GEN_MM_TRANS(NAME)                                         \
2653static bool trans_##NAME(DisasContext *s, arg_r *a)                \
2654{                                                                  \
2655    if (require_rvv(s) &&                                          \
2656        vext_check_isa_ill(s)) {                                   \
2657        uint32_t data = 0;                                         \
2658        gen_helper_gvec_4_ptr *fn = gen_helper_##NAME;             \
2659        TCGLabel *over = gen_new_label();                          \
2660        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2661                                                                   \
2662        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2663        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
2664                           vreg_ofs(s, a->rs1),                    \
2665                           vreg_ofs(s, a->rs2), cpu_env,           \
2666                           s->vlen / 8, s->vlen / 8, data, fn);    \
2667        mark_vs_dirty(s);                                          \
2668        gen_set_label(over);                                       \
2669        return true;                                               \
2670    }                                                              \
2671    return false;                                                  \
2672}
2673
2674GEN_MM_TRANS(vmand_mm)
2675GEN_MM_TRANS(vmnand_mm)
2676GEN_MM_TRANS(vmandnot_mm)
2677GEN_MM_TRANS(vmxor_mm)
2678GEN_MM_TRANS(vmor_mm)
2679GEN_MM_TRANS(vmnor_mm)
2680GEN_MM_TRANS(vmornot_mm)
2681GEN_MM_TRANS(vmxnor_mm)
2682
2683/* Vector count population in mask vcpop */
2684static bool trans_vcpop_m(DisasContext *s, arg_rmr *a)
2685{
2686    if (require_rvv(s) &&
2687        vext_check_isa_ill(s)) {
2688        TCGv_ptr src2, mask;
2689        TCGv dst;
2690        TCGv_i32 desc;
2691        uint32_t data = 0;
2692        data = FIELD_DP32(data, VDATA, VM, a->vm);
2693        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
2694
2695        mask = tcg_temp_new_ptr();
2696        src2 = tcg_temp_new_ptr();
2697        dst = dest_gpr(s, a->rd);
2698        desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
2699
2700        tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
2701        tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
2702
2703        gen_helper_vcpop_m(dst, mask, src2, cpu_env, desc);
2704        gen_set_gpr(s, a->rd, dst);
2705
2706        tcg_temp_free_ptr(mask);
2707        tcg_temp_free_ptr(src2);
2708
2709        return true;
2710    }
2711    return false;
2712}
2713
2714/* vmfirst find-first-set mask bit */
2715static bool trans_vfirst_m(DisasContext *s, arg_rmr *a)
2716{
2717    if (require_rvv(s) &&
2718        vext_check_isa_ill(s)) {
2719        TCGv_ptr src2, mask;
2720        TCGv dst;
2721        TCGv_i32 desc;
2722        uint32_t data = 0;
2723        data = FIELD_DP32(data, VDATA, VM, a->vm);
2724        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
2725
2726        mask = tcg_temp_new_ptr();
2727        src2 = tcg_temp_new_ptr();
2728        dst = dest_gpr(s, a->rd);
2729        desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
2730
2731        tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
2732        tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
2733
2734        gen_helper_vfirst_m(dst, mask, src2, cpu_env, desc);
2735        gen_set_gpr(s, a->rd, dst);
2736
2737        tcg_temp_free_ptr(mask);
2738        tcg_temp_free_ptr(src2);
2739        return true;
2740    }
2741    return false;
2742}
2743
2744/* vmsbf.m set-before-first mask bit */
2745/* vmsif.m set-includ-first mask bit */
2746/* vmsof.m set-only-first mask bit */
2747#define GEN_M_TRANS(NAME)                                          \
2748static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
2749{                                                                  \
2750    if (require_rvv(s) &&                                          \
2751        vext_check_isa_ill(s) &&                                   \
2752        require_vm(a->vm, a->rd) &&                                \
2753        (a->rd != a->rs2)) {                                       \
2754        uint32_t data = 0;                                         \
2755        gen_helper_gvec_3_ptr *fn = gen_helper_##NAME;             \
2756        TCGLabel *over = gen_new_label();                          \
2757        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
2758                                                                   \
2759        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
2760        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
2761        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd),                     \
2762                           vreg_ofs(s, 0), vreg_ofs(s, a->rs2),    \
2763                           cpu_env, s->vlen / 8, s->vlen / 8,      \
2764                           data, fn);                              \
2765        mark_vs_dirty(s);                                          \
2766        gen_set_label(over);                                       \
2767        return true;                                               \
2768    }                                                              \
2769    return false;                                                  \
2770}
2771
2772GEN_M_TRANS(vmsbf_m)
2773GEN_M_TRANS(vmsif_m)
2774GEN_M_TRANS(vmsof_m)
2775
2776/*
2777 * Vector Iota Instruction
2778 *
2779 * 1. The destination register cannot overlap the source register.
2780 * 2. If masked, cannot overlap the mask register ('v0').
2781 * 3. An illegal instruction exception is raised if vstart is non-zero.
2782 */
2783static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
2784{
2785    if (require_rvv(s) &&
2786        vext_check_isa_ill(s) &&
2787        !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) &&
2788        require_vm(a->vm, a->rd) &&
2789        require_align(a->rd, s->lmul)) {
2790        uint32_t data = 0;
2791        TCGLabel *over = gen_new_label();
2792        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
2793
2794        data = FIELD_DP32(data, VDATA, VM, a->vm);
2795        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
2796        static gen_helper_gvec_3_ptr * const fns[4] = {
2797            gen_helper_viota_m_b, gen_helper_viota_m_h,
2798            gen_helper_viota_m_w, gen_helper_viota_m_d,
2799        };
2800        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
2801                           vreg_ofs(s, a->rs2), cpu_env,
2802                           s->vlen / 8, s->vlen / 8, data, fns[s->sew]);
2803        mark_vs_dirty(s);
2804        gen_set_label(over);
2805        return true;
2806    }
2807    return false;
2808}
2809
2810/* Vector Element Index Instruction */
2811static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
2812{
2813    if (require_rvv(s) &&
2814        vext_check_isa_ill(s) &&
2815        require_align(a->rd, s->lmul) &&
2816        require_vm(a->vm, a->rd)) {
2817        uint32_t data = 0;
2818        TCGLabel *over = gen_new_label();
2819        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
2820
2821        data = FIELD_DP32(data, VDATA, VM, a->vm);
2822        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
2823        static gen_helper_gvec_2_ptr * const fns[4] = {
2824            gen_helper_vid_v_b, gen_helper_vid_v_h,
2825            gen_helper_vid_v_w, gen_helper_vid_v_d,
2826        };
2827        tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
2828                           cpu_env, s->vlen / 8, s->vlen / 8,
2829                           data, fns[s->sew]);
2830        mark_vs_dirty(s);
2831        gen_set_label(over);
2832        return true;
2833    }
2834    return false;
2835}
2836
2837/*
2838 *** Vector Permutation Instructions
2839 */
2840
2841/* Integer Extract Instruction */
2842
2843static void load_element(TCGv_i64 dest, TCGv_ptr base,
2844                         int ofs, int sew, bool sign)
2845{
2846    switch (sew) {
2847    case MO_8:
2848        if (!sign) {
2849            tcg_gen_ld8u_i64(dest, base, ofs);
2850        } else {
2851            tcg_gen_ld8s_i64(dest, base, ofs);
2852        }
2853        break;
2854    case MO_16:
2855        if (!sign) {
2856            tcg_gen_ld16u_i64(dest, base, ofs);
2857        } else {
2858            tcg_gen_ld16s_i64(dest, base, ofs);
2859        }
2860        break;
2861    case MO_32:
2862        if (!sign) {
2863            tcg_gen_ld32u_i64(dest, base, ofs);
2864        } else {
2865            tcg_gen_ld32s_i64(dest, base, ofs);
2866        }
2867        break;
2868    case MO_64:
2869        tcg_gen_ld_i64(dest, base, ofs);
2870        break;
2871    default:
2872        g_assert_not_reached();
2873        break;
2874    }
2875}
2876
2877/* offset of the idx element with base regsiter r */
2878static uint32_t endian_ofs(DisasContext *s, int r, int idx)
2879{
2880#ifdef HOST_WORDS_BIGENDIAN
2881    return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew);
2882#else
2883    return vreg_ofs(s, r) + (idx << s->sew);
2884#endif
2885}
2886
2887/* adjust the index according to the endian */
2888static void endian_adjust(TCGv_i32 ofs, int sew)
2889{
2890#ifdef HOST_WORDS_BIGENDIAN
2891    tcg_gen_xori_i32(ofs, ofs, 7 >> sew);
2892#endif
2893}
2894
2895/* Load idx >= VLMAX ? 0 : vreg[idx] */
2896static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
2897                              int vreg, TCGv idx, int vlmax)
2898{
2899    TCGv_i32 ofs = tcg_temp_new_i32();
2900    TCGv_ptr base = tcg_temp_new_ptr();
2901    TCGv_i64 t_idx = tcg_temp_new_i64();
2902    TCGv_i64 t_vlmax, t_zero;
2903
2904    /*
2905     * Mask the index to the length so that we do
2906     * not produce an out-of-range load.
2907     */
2908    tcg_gen_trunc_tl_i32(ofs, idx);
2909    tcg_gen_andi_i32(ofs, ofs, vlmax - 1);
2910
2911    /* Convert the index to an offset. */
2912    endian_adjust(ofs, s->sew);
2913    tcg_gen_shli_i32(ofs, ofs, s->sew);
2914
2915    /* Convert the index to a pointer. */
2916    tcg_gen_ext_i32_ptr(base, ofs);
2917    tcg_gen_add_ptr(base, base, cpu_env);
2918
2919    /* Perform the load. */
2920    load_element(dest, base,
2921                 vreg_ofs(s, vreg), s->sew, false);
2922    tcg_temp_free_ptr(base);
2923    tcg_temp_free_i32(ofs);
2924
2925    /* Flush out-of-range indexing to zero.  */
2926    t_vlmax = tcg_constant_i64(vlmax);
2927    t_zero = tcg_constant_i64(0);
2928    tcg_gen_extu_tl_i64(t_idx, idx);
2929
2930    tcg_gen_movcond_i64(TCG_COND_LTU, dest, t_idx,
2931                        t_vlmax, dest, t_zero);
2932
2933    tcg_temp_free_i64(t_idx);
2934}
2935
2936static void vec_element_loadi(DisasContext *s, TCGv_i64 dest,
2937                              int vreg, int idx, bool sign)
2938{
2939    load_element(dest, cpu_env, endian_ofs(s, vreg, idx), s->sew, sign);
2940}
2941
2942static bool trans_vext_x_v(DisasContext *s, arg_r *a)
2943{
2944    TCGv_i64 tmp = tcg_temp_new_i64();
2945    TCGv dest = dest_gpr(s, a->rd);
2946
2947    if (a->rs1 == 0) {
2948        /* Special case vmv.x.s rd, vs2. */
2949        vec_element_loadi(s, tmp, a->rs2, 0, false);
2950    } else {
2951        /* This instruction ignores LMUL and vector register groups */
2952        int vlmax = s->vlen >> (3 + s->sew);
2953        vec_element_loadx(s, tmp, a->rs2, cpu_gpr[a->rs1], vlmax);
2954    }
2955
2956    tcg_gen_trunc_i64_tl(dest, tmp);
2957    gen_set_gpr(s, a->rd, dest);
2958
2959    tcg_temp_free_i64(tmp);
2960    return true;
2961}
2962
2963/* Integer Scalar Move Instruction */
2964
2965static void store_element(TCGv_i64 val, TCGv_ptr base,
2966                          int ofs, int sew)
2967{
2968    switch (sew) {
2969    case MO_8:
2970        tcg_gen_st8_i64(val, base, ofs);
2971        break;
2972    case MO_16:
2973        tcg_gen_st16_i64(val, base, ofs);
2974        break;
2975    case MO_32:
2976        tcg_gen_st32_i64(val, base, ofs);
2977        break;
2978    case MO_64:
2979        tcg_gen_st_i64(val, base, ofs);
2980        break;
2981    default:
2982        g_assert_not_reached();
2983        break;
2984    }
2985}
2986
2987/*
2988 * Store vreg[idx] = val.
2989 * The index must be in range of VLMAX.
2990 */
2991static void vec_element_storei(DisasContext *s, int vreg,
2992                               int idx, TCGv_i64 val)
2993{
2994    store_element(val, cpu_env, endian_ofs(s, vreg, idx), s->sew);
2995}
2996
2997/* vmv.x.s rd, vs2 # x[rd] = vs2[0] */
2998static bool trans_vmv_x_s(DisasContext *s, arg_vmv_x_s *a)
2999{
3000    if (require_rvv(s) &&
3001        vext_check_isa_ill(s)) {
3002        TCGv_i64 t1;
3003        TCGv dest;
3004
3005        t1 = tcg_temp_new_i64();
3006        dest = tcg_temp_new();
3007        /*
3008         * load vreg and sign-extend to 64 bits,
3009         * then truncate to XLEN bits before storing to gpr.
3010         */
3011        vec_element_loadi(s, t1, a->rs2, 0, true);
3012        tcg_gen_trunc_i64_tl(dest, t1);
3013        gen_set_gpr(s, a->rd, dest);
3014        tcg_temp_free_i64(t1);
3015        tcg_temp_free(dest);
3016
3017        return true;
3018    }
3019    return false;
3020}
3021
3022/* vmv.s.x vd, rs1 # vd[0] = rs1 */
3023static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
3024{
3025    if (require_rvv(s) &&
3026        vext_check_isa_ill(s)) {
3027        /* This instruction ignores LMUL and vector register groups */
3028        TCGv_i64 t1;
3029        TCGv s1;
3030        TCGLabel *over = gen_new_label();
3031
3032        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
3033
3034        t1 = tcg_temp_new_i64();
3035
3036        /*
3037         * load gpr and sign-extend to 64 bits,
3038         * then truncate to SEW bits when storing to vreg.
3039         */
3040        s1 = get_gpr(s, a->rs1, EXT_NONE);
3041        tcg_gen_ext_tl_i64(t1, s1);
3042        vec_element_storei(s, a->rd, 0, t1);
3043        tcg_temp_free_i64(t1);
3044        mark_vs_dirty(s);
3045        gen_set_label(over);
3046        return true;
3047    }
3048    return false;
3049}
3050
3051/* Floating-Point Scalar Move Instructions */
3052static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a)
3053{
3054    if (require_rvv(s) &&
3055        require_rvf(s) &&
3056        vext_check_isa_ill(s)) {
3057        unsigned int ofs = (8 << s->sew);
3058        unsigned int len = 64 - ofs;
3059        TCGv_i64 t_nan;
3060
3061        vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0, false);
3062        /* NaN-box f[rd] as necessary for SEW */
3063        if (len) {
3064            t_nan = tcg_constant_i64(UINT64_MAX);
3065            tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rd],
3066                                t_nan, ofs, len);
3067        }
3068
3069        mark_fs_dirty(s);
3070        return true;
3071    }
3072    return false;
3073}
3074
3075/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */
3076static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
3077{
3078    if (require_rvv(s) &&
3079        require_rvf(s) &&
3080        vext_check_isa_ill(s)) {
3081        /* The instructions ignore LMUL and vector register group. */
3082        TCGv_i64 t1;
3083        TCGLabel *over = gen_new_label();
3084
3085        /* if vl == 0, skip vector register write back */
3086        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
3087
3088        /* NaN-box f[rs1] */
3089        t1 = tcg_temp_new_i64();
3090        do_nanbox(s, t1, cpu_fpr[a->rs1]);
3091
3092        vec_element_storei(s, a->rd, 0, t1);
3093        tcg_temp_free_i64(t1);
3094        mark_vs_dirty(s);
3095        gen_set_label(over);
3096        return true;
3097    }
3098    return false;
3099}
3100
3101/* Vector Slide Instructions */
3102static bool slideup_check(DisasContext *s, arg_rmrr *a)
3103{
3104    return require_rvv(s) &&
3105           vext_check_isa_ill(s) &&
3106           vext_check_slide(s, a->rd, a->rs2, a->vm, true);
3107}
3108
3109GEN_OPIVX_TRANS(vslideup_vx, slideup_check)
3110GEN_OPIVX_TRANS(vslide1up_vx, slideup_check)
3111GEN_OPIVI_TRANS(vslideup_vi, IMM_ZX, vslideup_vx, slideup_check)
3112
3113static bool slidedown_check(DisasContext *s, arg_rmrr *a)
3114{
3115    return require_rvv(s) &&
3116           vext_check_isa_ill(s) &&
3117           vext_check_slide(s, a->rd, a->rs2, a->vm, false);
3118}
3119
3120GEN_OPIVX_TRANS(vslidedown_vx, slidedown_check)
3121GEN_OPIVX_TRANS(vslide1down_vx, slidedown_check)
3122GEN_OPIVI_TRANS(vslidedown_vi, IMM_ZX, vslidedown_vx, slidedown_check)
3123
3124/* Vector Floating-Point Slide Instructions */
3125static bool fslideup_check(DisasContext *s, arg_rmrr *a)
3126{
3127    return slideup_check(s, a) &&
3128           require_rvf(s);
3129}
3130
3131static bool fslidedown_check(DisasContext *s, arg_rmrr *a)
3132{
3133    return slidedown_check(s, a) &&
3134           require_rvf(s);
3135}
3136
3137GEN_OPFVF_TRANS(vfslide1up_vf, fslideup_check)
3138GEN_OPFVF_TRANS(vfslide1down_vf, fslidedown_check)
3139
3140/* Vector Register Gather Instruction */
3141static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a)
3142{
3143    return require_rvv(s) &&
3144           vext_check_isa_ill(s) &&
3145           require_align(a->rd, s->lmul) &&
3146           require_align(a->rs1, s->lmul) &&
3147           require_align(a->rs2, s->lmul) &&
3148           (a->rd != a->rs2 && a->rd != a->rs1) &&
3149           require_vm(a->vm, a->rd);
3150}
3151
3152static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a)
3153{
3154    int8_t emul = MO_16 - s->sew + s->lmul;
3155    return require_rvv(s) &&
3156           vext_check_isa_ill(s) &&
3157           (emul >= -3 && emul <= 3) &&
3158           require_align(a->rd, s->lmul) &&
3159           require_align(a->rs1, emul) &&
3160           require_align(a->rs2, s->lmul) &&
3161           (a->rd != a->rs2 && a->rd != a->rs1) &&
3162           !is_overlapped(a->rd, 1 << MAX(s->lmul, 0),
3163                          a->rs1, 1 << MAX(emul, 0)) &&
3164           !is_overlapped(a->rd, 1 << MAX(s->lmul, 0),
3165                          a->rs2, 1 << MAX(s->lmul, 0)) &&
3166           require_vm(a->vm, a->rd);
3167}
3168
3169GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check)
3170GEN_OPIVV_TRANS(vrgatherei16_vv, vrgatherei16_vv_check)
3171
3172static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a)
3173{
3174    return require_rvv(s) &&
3175           vext_check_isa_ill(s) &&
3176           require_align(a->rd, s->lmul) &&
3177           require_align(a->rs2, s->lmul) &&
3178           (a->rd != a->rs2) &&
3179           require_vm(a->vm, a->rd);
3180}
3181
3182/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3183static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a)
3184{
3185    if (!vrgather_vx_check(s, a)) {
3186        return false;
3187    }
3188
3189    if (a->vm && s->vl_eq_vlmax) {
3190        int scale = s->lmul - (s->sew + 3);
3191        int vlmax = scale < 0 ? s->vlen >> -scale : s->vlen << scale;
3192        TCGv_i64 dest = tcg_temp_new_i64();
3193
3194        if (a->rs1 == 0) {
3195            vec_element_loadi(s, dest, a->rs2, 0, false);
3196        } else {
3197            vec_element_loadx(s, dest, a->rs2, cpu_gpr[a->rs1], vlmax);
3198        }
3199
3200        tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
3201                             MAXSZ(s), MAXSZ(s), dest);
3202        tcg_temp_free_i64(dest);
3203        mark_vs_dirty(s);
3204    } else {
3205        static gen_helper_opivx * const fns[4] = {
3206            gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h,
3207            gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d
3208        };
3209        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);
3210    }
3211    return true;
3212}
3213
3214/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */
3215static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a)
3216{
3217    if (!vrgather_vx_check(s, a)) {
3218        return false;
3219    }
3220
3221    if (a->vm && s->vl_eq_vlmax) {
3222        int scale = s->lmul - (s->sew + 3);
3223        int vlmax = scale < 0 ? s->vlen >> -scale : s->vlen << scale;
3224        if (a->rs1 >= vlmax) {
3225            tcg_gen_gvec_dup_imm(MO_64, vreg_ofs(s, a->rd),
3226                                 MAXSZ(s), MAXSZ(s), 0);
3227        } else {
3228            tcg_gen_gvec_dup_mem(s->sew, vreg_ofs(s, a->rd),
3229                                 endian_ofs(s, a->rs2, a->rs1),
3230                                 MAXSZ(s), MAXSZ(s));
3231        }
3232        mark_vs_dirty(s);
3233    } else {
3234        static gen_helper_opivx * const fns[4] = {
3235            gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h,
3236            gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d
3237        };
3238        return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew],
3239                           s, IMM_ZX);
3240    }
3241    return true;
3242}
3243
3244/*
3245 * Vector Compress Instruction
3246 *
3247 * The destination vector register group cannot overlap the
3248 * source vector register group or the source mask register.
3249 */
3250static bool vcompress_vm_check(DisasContext *s, arg_r *a)
3251{
3252    return require_rvv(s) &&
3253           vext_check_isa_ill(s) &&
3254           require_align(a->rd, s->lmul) &&
3255           require_align(a->rs2, s->lmul) &&
3256           (a->rd != a->rs2) &&
3257           !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs1, 1);
3258}
3259
3260static bool trans_vcompress_vm(DisasContext *s, arg_r *a)
3261{
3262    if (vcompress_vm_check(s, a)) {
3263        uint32_t data = 0;
3264        static gen_helper_gvec_4_ptr * const fns[4] = {
3265            gen_helper_vcompress_vm_b, gen_helper_vcompress_vm_h,
3266            gen_helper_vcompress_vm_w, gen_helper_vcompress_vm_d,
3267        };
3268        TCGLabel *over = gen_new_label();
3269        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
3270
3271        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
3272        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
3273                           vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
3274                           cpu_env, s->vlen / 8, s->vlen / 8, data,
3275                           fns[s->sew]);
3276        mark_vs_dirty(s);
3277        gen_set_label(over);
3278        return true;
3279    }
3280    return false;
3281}
3282
3283/*
3284 * Whole Vector Register Move Instructions ignore vtype and vl setting.
3285 * Thus, we don't need to check vill bit. (Section 16.6)
3286 */
3287#define GEN_VMV_WHOLE_TRANS(NAME, LEN)                          \
3288static bool trans_##NAME(DisasContext *s, arg_##NAME * a)       \
3289{                                                               \
3290    if (require_rvv(s) &&                                       \
3291        QEMU_IS_ALIGNED(a->rd, LEN) &&                          \
3292        QEMU_IS_ALIGNED(a->rs2, LEN)) {                         \
3293        /* EEW = 8 */                                           \
3294        tcg_gen_gvec_mov(MO_8, vreg_ofs(s, a->rd),              \
3295                         vreg_ofs(s, a->rs2),                   \
3296                         s->vlen / 8 * LEN, s->vlen / 8 * LEN); \
3297        mark_vs_dirty(s);                                       \
3298        return true;                                            \
3299    }                                                           \
3300    return false;                                               \
3301}
3302
3303GEN_VMV_WHOLE_TRANS(vmv1r_v, 1)
3304GEN_VMV_WHOLE_TRANS(vmv2r_v, 2)
3305GEN_VMV_WHOLE_TRANS(vmv4r_v, 4)
3306GEN_VMV_WHOLE_TRANS(vmv8r_v, 8)
3307
3308static bool int_ext_check(DisasContext *s, arg_rmr *a, uint8_t div)
3309{
3310    uint8_t from = (s->sew + 3) - div;
3311    bool ret = require_rvv(s) &&
3312        (from >= 3 && from <= 8) &&
3313        (a->rd != a->rs2) &&
3314        require_align(a->rd, s->lmul) &&
3315        require_align(a->rs2, s->lmul - div) &&
3316        require_vm(a->vm, a->rd) &&
3317        require_noover(a->rd, s->lmul, a->rs2, s->lmul - div);
3318    return ret;
3319}
3320
3321static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
3322{
3323    uint32_t data = 0;
3324    gen_helper_gvec_3_ptr *fn;
3325    TCGLabel *over = gen_new_label();
3326    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
3327
3328    static gen_helper_gvec_3_ptr * const fns[6][4] = {
3329        {
3330            NULL, gen_helper_vzext_vf2_h,
3331            gen_helper_vzext_vf2_w, gen_helper_vzext_vf2_d
3332        },
3333        {
3334            NULL, NULL,
3335            gen_helper_vzext_vf4_w, gen_helper_vzext_vf4_d,
3336        },
3337        {
3338            NULL, NULL,
3339            NULL, gen_helper_vzext_vf8_d
3340        },
3341        {
3342            NULL, gen_helper_vsext_vf2_h,
3343            gen_helper_vsext_vf2_w, gen_helper_vsext_vf2_d
3344        },
3345        {
3346            NULL, NULL,
3347            gen_helper_vsext_vf4_w, gen_helper_vsext_vf4_d,
3348        },
3349        {
3350            NULL, NULL,
3351            NULL, gen_helper_vsext_vf8_d
3352        }
3353    };
3354
3355    fn = fns[seq][s->sew];
3356    if (fn == NULL) {
3357        return false;
3358    }
3359
3360    data = FIELD_DP32(data, VDATA, VM, a->vm);
3361
3362    tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
3363                       vreg_ofs(s, a->rs2), cpu_env,
3364                       s->vlen / 8, s->vlen / 8, data, fn);
3365
3366    mark_vs_dirty(s);
3367    gen_set_label(over);
3368    return true;
3369}
3370
3371/* Vector Integer Extension */
3372#define GEN_INT_EXT_TRANS(NAME, DIV, SEQ)             \
3373static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
3374{                                                     \
3375    if (int_ext_check(s, a, DIV)) {                   \
3376        return int_ext_op(s, a, SEQ);                 \
3377    }                                                 \
3378    return false;                                     \
3379}
3380
3381GEN_INT_EXT_TRANS(vzext_vf2, 1, 0)
3382GEN_INT_EXT_TRANS(vzext_vf4, 2, 1)
3383GEN_INT_EXT_TRANS(vzext_vf8, 3, 2)
3384GEN_INT_EXT_TRANS(vsext_vf2, 1, 3)
3385GEN_INT_EXT_TRANS(vsext_vf4, 2, 4)
3386GEN_INT_EXT_TRANS(vsext_vf8, 3, 5)
3387