xref: /openbmc/qemu/target/loongarch/tcg/insn_trans/trans_vec.c.inc (revision 8188c3cda586dc445cac875b7d21c0c960dbe97e)
1*5c23704eSSong Gao/* SPDX-License-Identifier: GPL-2.0-or-later */
2*5c23704eSSong Gao/*
3*5c23704eSSong Gao * LoongArch vector translate functions
4*5c23704eSSong Gao * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
5*5c23704eSSong Gao */
6*5c23704eSSong Gao
7*5c23704eSSong Gaostatic bool check_vec(DisasContext *ctx, uint32_t oprsz)
8*5c23704eSSong Gao{
9*5c23704eSSong Gao    if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
10*5c23704eSSong Gao        generate_exception(ctx, EXCCODE_SXD);
11*5c23704eSSong Gao        return false;
12*5c23704eSSong Gao    }
13*5c23704eSSong Gao
14*5c23704eSSong Gao    if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) {
15*5c23704eSSong Gao        generate_exception(ctx, EXCCODE_ASXD);
16*5c23704eSSong Gao        return false;
17*5c23704eSSong Gao    }
18*5c23704eSSong Gao
19*5c23704eSSong Gao    return true;
20*5c23704eSSong Gao}
21*5c23704eSSong Gao
22*5c23704eSSong Gaostatic bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
23*5c23704eSSong Gao                            gen_helper_gvec_4_ptr *fn)
24*5c23704eSSong Gao{
25*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
26*5c23704eSSong Gao        return true;
27*5c23704eSSong Gao    }
28*5c23704eSSong Gao
29*5c23704eSSong Gao    tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
30*5c23704eSSong Gao                       vec_full_offset(a->vj),
31*5c23704eSSong Gao                       vec_full_offset(a->vk),
32*5c23704eSSong Gao                       vec_full_offset(a->va),
33*5c23704eSSong Gao                       tcg_env,
34*5c23704eSSong Gao                       oprsz, ctx->vl / 8, 0, fn);
35*5c23704eSSong Gao    return true;
36*5c23704eSSong Gao}
37*5c23704eSSong Gao
38*5c23704eSSong Gaostatic bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
39*5c23704eSSong Gao                         gen_helper_gvec_4_ptr *fn)
40*5c23704eSSong Gao{
41*5c23704eSSong Gao    return gen_vvvv_ptr_vl(ctx, a, 16, fn);
42*5c23704eSSong Gao}
43*5c23704eSSong Gao
44*5c23704eSSong Gaostatic bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a,
45*5c23704eSSong Gao                         gen_helper_gvec_4_ptr *fn)
46*5c23704eSSong Gao{
47*5c23704eSSong Gao    return gen_vvvv_ptr_vl(ctx, a, 32, fn);
48*5c23704eSSong Gao}
49*5c23704eSSong Gao
50*5c23704eSSong Gaostatic bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
51*5c23704eSSong Gao                        gen_helper_gvec_4 *fn)
52*5c23704eSSong Gao{
53*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
54*5c23704eSSong Gao        return true;
55*5c23704eSSong Gao    }
56*5c23704eSSong Gao
57*5c23704eSSong Gao    tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
58*5c23704eSSong Gao                       vec_full_offset(a->vj),
59*5c23704eSSong Gao                       vec_full_offset(a->vk),
60*5c23704eSSong Gao                       vec_full_offset(a->va),
61*5c23704eSSong Gao                       oprsz, ctx->vl / 8, 0, fn);
62*5c23704eSSong Gao    return true;
63*5c23704eSSong Gao}
64*5c23704eSSong Gao
65*5c23704eSSong Gaostatic bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
66*5c23704eSSong Gao                     gen_helper_gvec_4 *fn)
67*5c23704eSSong Gao{
68*5c23704eSSong Gao    return gen_vvvv_vl(ctx, a, 16, fn);
69*5c23704eSSong Gao}
70*5c23704eSSong Gao
71*5c23704eSSong Gaostatic bool gen_xxxx(DisasContext *ctx, arg_vvvv *a,
72*5c23704eSSong Gao                     gen_helper_gvec_4 *fn)
73*5c23704eSSong Gao{
74*5c23704eSSong Gao    return gen_vvvv_vl(ctx, a, 32, fn);
75*5c23704eSSong Gao}
76*5c23704eSSong Gao
77*5c23704eSSong Gaostatic bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
78*5c23704eSSong Gao                           gen_helper_gvec_3_ptr *fn)
79*5c23704eSSong Gao{
80*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
81*5c23704eSSong Gao        return true;
82*5c23704eSSong Gao    }
83*5c23704eSSong Gao    tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
84*5c23704eSSong Gao                       vec_full_offset(a->vj),
85*5c23704eSSong Gao                       vec_full_offset(a->vk),
86*5c23704eSSong Gao                       tcg_env,
87*5c23704eSSong Gao                       oprsz, ctx->vl / 8, 0, fn);
88*5c23704eSSong Gao    return true;
89*5c23704eSSong Gao}
90*5c23704eSSong Gao
91*5c23704eSSong Gaostatic bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
92*5c23704eSSong Gao                        gen_helper_gvec_3_ptr *fn)
93*5c23704eSSong Gao{
94*5c23704eSSong Gao    return gen_vvv_ptr_vl(ctx, a, 16, fn);
95*5c23704eSSong Gao}
96*5c23704eSSong Gao
97*5c23704eSSong Gaostatic bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a,
98*5c23704eSSong Gao                        gen_helper_gvec_3_ptr *fn)
99*5c23704eSSong Gao{
100*5c23704eSSong Gao    return gen_vvv_ptr_vl(ctx, a, 32, fn);
101*5c23704eSSong Gao}
102*5c23704eSSong Gao
103*5c23704eSSong Gaostatic bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
104*5c23704eSSong Gao                       gen_helper_gvec_3 *fn)
105*5c23704eSSong Gao{
106*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
107*5c23704eSSong Gao        return true;
108*5c23704eSSong Gao    }
109*5c23704eSSong Gao
110*5c23704eSSong Gao    tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
111*5c23704eSSong Gao                       vec_full_offset(a->vj),
112*5c23704eSSong Gao                       vec_full_offset(a->vk),
113*5c23704eSSong Gao                       oprsz, ctx->vl / 8, 0, fn);
114*5c23704eSSong Gao    return true;
115*5c23704eSSong Gao}
116*5c23704eSSong Gao
117*5c23704eSSong Gaostatic bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
118*5c23704eSSong Gao{
119*5c23704eSSong Gao    return gen_vvv_vl(ctx, a, 16, fn);
120*5c23704eSSong Gao}
121*5c23704eSSong Gao
122*5c23704eSSong Gaostatic bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
123*5c23704eSSong Gao{
124*5c23704eSSong Gao    return gen_vvv_vl(ctx, a, 32, fn);
125*5c23704eSSong Gao}
126*5c23704eSSong Gao
127*5c23704eSSong Gaostatic bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
128*5c23704eSSong Gao                          gen_helper_gvec_2_ptr *fn)
129*5c23704eSSong Gao{
130*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
131*5c23704eSSong Gao        return true;
132*5c23704eSSong Gao    }
133*5c23704eSSong Gao
134*5c23704eSSong Gao    tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
135*5c23704eSSong Gao                       vec_full_offset(a->vj),
136*5c23704eSSong Gao                       tcg_env,
137*5c23704eSSong Gao                       oprsz, ctx->vl / 8, 0, fn);
138*5c23704eSSong Gao    return true;
139*5c23704eSSong Gao}
140*5c23704eSSong Gao
141*5c23704eSSong Gaostatic bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
142*5c23704eSSong Gao                       gen_helper_gvec_2_ptr *fn)
143*5c23704eSSong Gao{
144*5c23704eSSong Gao    return gen_vv_ptr_vl(ctx, a, 16, fn);
145*5c23704eSSong Gao}
146*5c23704eSSong Gao
147*5c23704eSSong Gaostatic bool gen_xx_ptr(DisasContext *ctx, arg_vv *a,
148*5c23704eSSong Gao                       gen_helper_gvec_2_ptr *fn)
149*5c23704eSSong Gao{
150*5c23704eSSong Gao    return gen_vv_ptr_vl(ctx, a, 32, fn);
151*5c23704eSSong Gao}
152*5c23704eSSong Gao
153*5c23704eSSong Gaostatic bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
154*5c23704eSSong Gao                      gen_helper_gvec_2 *fn)
155*5c23704eSSong Gao{
156*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
157*5c23704eSSong Gao        return true;
158*5c23704eSSong Gao    }
159*5c23704eSSong Gao
160*5c23704eSSong Gao    tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
161*5c23704eSSong Gao                       vec_full_offset(a->vj),
162*5c23704eSSong Gao                       oprsz, ctx->vl / 8, 0, fn);
163*5c23704eSSong Gao    return true;
164*5c23704eSSong Gao}
165*5c23704eSSong Gao
166*5c23704eSSong Gaostatic bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
167*5c23704eSSong Gao{
168*5c23704eSSong Gao    return gen_vv_vl(ctx, a, 16, fn);
169*5c23704eSSong Gao}
170*5c23704eSSong Gao
171*5c23704eSSong Gaostatic bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
172*5c23704eSSong Gao{
173*5c23704eSSong Gao    return gen_vv_vl(ctx, a, 32, fn);
174*5c23704eSSong Gao}
175*5c23704eSSong Gao
176*5c23704eSSong Gaostatic bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
177*5c23704eSSong Gao                        gen_helper_gvec_2i *fn)
178*5c23704eSSong Gao{
179*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
180*5c23704eSSong Gao        return true;
181*5c23704eSSong Gao    }
182*5c23704eSSong Gao
183*5c23704eSSong Gao    tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
184*5c23704eSSong Gao                        vec_full_offset(a->vj),
185*5c23704eSSong Gao                        tcg_constant_i64(a->imm),
186*5c23704eSSong Gao                        oprsz, ctx->vl / 8, 0, fn);
187*5c23704eSSong Gao    return true;
188*5c23704eSSong Gao}
189*5c23704eSSong Gao
190*5c23704eSSong Gaostatic bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
191*5c23704eSSong Gao{
192*5c23704eSSong Gao    return gen_vv_i_vl(ctx, a, 16, fn);
193*5c23704eSSong Gao}
194*5c23704eSSong Gao
195*5c23704eSSong Gaostatic bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
196*5c23704eSSong Gao{
197*5c23704eSSong Gao    return gen_vv_i_vl(ctx, a, 32, fn);
198*5c23704eSSong Gao}
199*5c23704eSSong Gao
200*5c23704eSSong Gaostatic bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz,
201*5c23704eSSong Gao                      void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
202*5c23704eSSong Gao{
203*5c23704eSSong Gao    if (!check_vec(ctx, sz)) {
204*5c23704eSSong Gao        return true;
205*5c23704eSSong Gao    }
206*5c23704eSSong Gao
207*5c23704eSSong Gao    TCGv_i32 vj = tcg_constant_i32(a->vj);
208*5c23704eSSong Gao    TCGv_i32 cd = tcg_constant_i32(a->cd);
209*5c23704eSSong Gao    TCGv_i32 oprsz = tcg_constant_i32(sz);
210*5c23704eSSong Gao
211*5c23704eSSong Gao    func(tcg_env, oprsz, cd, vj);
212*5c23704eSSong Gao    return true;
213*5c23704eSSong Gao}
214*5c23704eSSong Gao
215*5c23704eSSong Gaostatic bool gen_cv(DisasContext *ctx, arg_cv *a,
216*5c23704eSSong Gao                   void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
217*5c23704eSSong Gao{
218*5c23704eSSong Gao    return gen_cv_vl(ctx, a, 16, func);
219*5c23704eSSong Gao}
220*5c23704eSSong Gao
221*5c23704eSSong Gaostatic bool gen_cx(DisasContext *ctx, arg_cv *a,
222*5c23704eSSong Gao                   void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
223*5c23704eSSong Gao{
224*5c23704eSSong Gao    return gen_cv_vl(ctx, a, 32, func);
225*5c23704eSSong Gao}
226*5c23704eSSong Gao
227*5c23704eSSong Gaostatic bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
228*5c23704eSSong Gao                        uint32_t oprsz, MemOp mop,
229*5c23704eSSong Gao                        void (*func)(unsigned, uint32_t, uint32_t,
230*5c23704eSSong Gao                                     uint32_t, uint32_t, uint32_t))
231*5c23704eSSong Gao{
232*5c23704eSSong Gao    uint32_t vd_ofs = vec_full_offset(a->vd);
233*5c23704eSSong Gao    uint32_t vj_ofs = vec_full_offset(a->vj);
234*5c23704eSSong Gao    uint32_t vk_ofs = vec_full_offset(a->vk);
235*5c23704eSSong Gao
236*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
237*5c23704eSSong Gao        return true;
238*5c23704eSSong Gao    }
239*5c23704eSSong Gao
240*5c23704eSSong Gao    func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
241*5c23704eSSong Gao    return true;
242*5c23704eSSong Gao}
243*5c23704eSSong Gao
244*5c23704eSSong Gaostatic bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
245*5c23704eSSong Gao                     void (*func)(unsigned, uint32_t, uint32_t,
246*5c23704eSSong Gao                                  uint32_t, uint32_t, uint32_t))
247*5c23704eSSong Gao{
248*5c23704eSSong Gao    return gvec_vvv_vl(ctx, a, 16, mop, func);
249*5c23704eSSong Gao}
250*5c23704eSSong Gao
251*5c23704eSSong Gaostatic bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
252*5c23704eSSong Gao                     void (*func)(unsigned, uint32_t, uint32_t,
253*5c23704eSSong Gao                                  uint32_t, uint32_t, uint32_t))
254*5c23704eSSong Gao{
255*5c23704eSSong Gao    return gvec_vvv_vl(ctx, a, 32, mop, func);
256*5c23704eSSong Gao}
257*5c23704eSSong Gao
258*5c23704eSSong Gaostatic bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
259*5c23704eSSong Gao                       uint32_t oprsz, MemOp mop,
260*5c23704eSSong Gao                       void (*func)(unsigned, uint32_t, uint32_t,
261*5c23704eSSong Gao                                    uint32_t, uint32_t))
262*5c23704eSSong Gao{
263*5c23704eSSong Gao    uint32_t vd_ofs = vec_full_offset(a->vd);
264*5c23704eSSong Gao    uint32_t vj_ofs = vec_full_offset(a->vj);
265*5c23704eSSong Gao
266*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
267*5c23704eSSong Gao        return true;
268*5c23704eSSong Gao    }
269*5c23704eSSong Gao
270*5c23704eSSong Gao    func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
271*5c23704eSSong Gao    return true;
272*5c23704eSSong Gao}
273*5c23704eSSong Gao
274*5c23704eSSong Gao
275*5c23704eSSong Gaostatic bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
276*5c23704eSSong Gao                    void (*func)(unsigned, uint32_t, uint32_t,
277*5c23704eSSong Gao                                 uint32_t, uint32_t))
278*5c23704eSSong Gao{
279*5c23704eSSong Gao    return gvec_vv_vl(ctx, a, 16, mop, func);
280*5c23704eSSong Gao}
281*5c23704eSSong Gao
282*5c23704eSSong Gaostatic bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
283*5c23704eSSong Gao                    void (*func)(unsigned, uint32_t, uint32_t,
284*5c23704eSSong Gao                                 uint32_t, uint32_t))
285*5c23704eSSong Gao{
286*5c23704eSSong Gao    return gvec_vv_vl(ctx, a, 32, mop, func);
287*5c23704eSSong Gao}
288*5c23704eSSong Gao
289*5c23704eSSong Gaostatic bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
290*5c23704eSSong Gao                         uint32_t oprsz, MemOp mop,
291*5c23704eSSong Gao                         void (*func)(unsigned, uint32_t, uint32_t,
292*5c23704eSSong Gao                                      int64_t, uint32_t, uint32_t))
293*5c23704eSSong Gao{
294*5c23704eSSong Gao    uint32_t vd_ofs = vec_full_offset(a->vd);
295*5c23704eSSong Gao    uint32_t vj_ofs = vec_full_offset(a->vj);
296*5c23704eSSong Gao
297*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
298*5c23704eSSong Gao        return true;
299*5c23704eSSong Gao    }
300*5c23704eSSong Gao
301*5c23704eSSong Gao    func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
302*5c23704eSSong Gao    return true;
303*5c23704eSSong Gao}
304*5c23704eSSong Gao
305*5c23704eSSong Gaostatic bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
306*5c23704eSSong Gao                      void (*func)(unsigned, uint32_t, uint32_t,
307*5c23704eSSong Gao                                   int64_t, uint32_t, uint32_t))
308*5c23704eSSong Gao{
309*5c23704eSSong Gao    return gvec_vv_i_vl(ctx, a, 16, mop, func);
310*5c23704eSSong Gao}
311*5c23704eSSong Gao
312*5c23704eSSong Gaostatic bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
313*5c23704eSSong Gao                      void (*func)(unsigned, uint32_t, uint32_t,
314*5c23704eSSong Gao                                   int64_t, uint32_t, uint32_t))
315*5c23704eSSong Gao{
316*5c23704eSSong Gao    return gvec_vv_i_vl(ctx,a, 32, mop, func);
317*5c23704eSSong Gao}
318*5c23704eSSong Gao
319*5c23704eSSong Gaostatic bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
320*5c23704eSSong Gao                         uint32_t oprsz, MemOp mop)
321*5c23704eSSong Gao{
322*5c23704eSSong Gao    uint32_t vd_ofs = vec_full_offset(a->vd);
323*5c23704eSSong Gao    uint32_t vj_ofs = vec_full_offset(a->vj);
324*5c23704eSSong Gao
325*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
326*5c23704eSSong Gao        return true;
327*5c23704eSSong Gao    }
328*5c23704eSSong Gao
329*5c23704eSSong Gao    tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
330*5c23704eSSong Gao    return true;
331*5c23704eSSong Gao}
332*5c23704eSSong Gao
333*5c23704eSSong Gaostatic bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
334*5c23704eSSong Gao{
335*5c23704eSSong Gao    return gvec_subi_vl(ctx, a, 16, mop);
336*5c23704eSSong Gao}
337*5c23704eSSong Gao
338*5c23704eSSong Gaostatic bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
339*5c23704eSSong Gao{
340*5c23704eSSong Gao    return gvec_subi_vl(ctx, a, 32, mop);
341*5c23704eSSong Gao}
342*5c23704eSSong Gao
343*5c23704eSSong GaoTRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
344*5c23704eSSong GaoTRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
345*5c23704eSSong GaoTRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
346*5c23704eSSong GaoTRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add)
347*5c23704eSSong GaoTRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add)
348*5c23704eSSong GaoTRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add)
349*5c23704eSSong GaoTRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add)
350*5c23704eSSong GaoTRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add)
351*5c23704eSSong Gao
352*5c23704eSSong Gaostatic bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
353*5c23704eSSong Gao                             void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
354*5c23704eSSong Gao                                          TCGv_i64, TCGv_i64, TCGv_i64))
355*5c23704eSSong Gao{
356*5c23704eSSong Gao    int i;
357*5c23704eSSong Gao    TCGv_i64 rh, rl, ah, al, bh, bl;
358*5c23704eSSong Gao
359*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
360*5c23704eSSong Gao        return true;
361*5c23704eSSong Gao    }
362*5c23704eSSong Gao
363*5c23704eSSong Gao    rh = tcg_temp_new_i64();
364*5c23704eSSong Gao    rl = tcg_temp_new_i64();
365*5c23704eSSong Gao    ah = tcg_temp_new_i64();
366*5c23704eSSong Gao    al = tcg_temp_new_i64();
367*5c23704eSSong Gao    bh = tcg_temp_new_i64();
368*5c23704eSSong Gao    bl = tcg_temp_new_i64();
369*5c23704eSSong Gao
370*5c23704eSSong Gao    for (i = 0; i < oprsz / 16; i++) {
371*5c23704eSSong Gao        get_vreg64(ah, a->vj, 1 + i * 2);
372*5c23704eSSong Gao        get_vreg64(al, a->vj, i * 2);
373*5c23704eSSong Gao        get_vreg64(bh, a->vk, 1 + i * 2);
374*5c23704eSSong Gao        get_vreg64(bl, a->vk, i * 2);
375*5c23704eSSong Gao
376*5c23704eSSong Gao        func(rl, rh, al, ah, bl, bh);
377*5c23704eSSong Gao
378*5c23704eSSong Gao        set_vreg64(rh, a->vd, 1 + i * 2);
379*5c23704eSSong Gao        set_vreg64(rl, a->vd, i * 2);
380*5c23704eSSong Gao    }
381*5c23704eSSong Gao    return true;
382*5c23704eSSong Gao}
383*5c23704eSSong Gao
384*5c23704eSSong Gaostatic bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
385*5c23704eSSong Gao                          void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
386*5c23704eSSong Gao                                       TCGv_i64, TCGv_i64, TCGv_i64))
387*5c23704eSSong Gao{
388*5c23704eSSong Gao    return gen_vaddsub_q_vl(ctx, a, 16, func);
389*5c23704eSSong Gao}
390*5c23704eSSong Gao
391*5c23704eSSong Gaostatic bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
392*5c23704eSSong Gao                           void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
393*5c23704eSSong Gao                                        TCGv_i64, TCGv_i64, TCGv_i64))
394*5c23704eSSong Gao{
395*5c23704eSSong Gao    return gen_vaddsub_q_vl(ctx, a, 32, func);
396*5c23704eSSong Gao}
397*5c23704eSSong Gao
398*5c23704eSSong GaoTRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub)
399*5c23704eSSong GaoTRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub)
400*5c23704eSSong GaoTRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub)
401*5c23704eSSong GaoTRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub)
402*5c23704eSSong GaoTRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub)
403*5c23704eSSong GaoTRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub)
404*5c23704eSSong GaoTRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub)
405*5c23704eSSong GaoTRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub)
406*5c23704eSSong Gao
407*5c23704eSSong GaoTRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64)
408*5c23704eSSong GaoTRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64)
409*5c23704eSSong GaoTRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64)
410*5c23704eSSong GaoTRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64)
411*5c23704eSSong Gao
412*5c23704eSSong GaoTRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
413*5c23704eSSong GaoTRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
414*5c23704eSSong GaoTRANS(vaddi_wu, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_addi)
415*5c23704eSSong GaoTRANS(vaddi_du, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_addi)
416*5c23704eSSong GaoTRANS(vsubi_bu, LSX, gvec_subi, MO_8)
417*5c23704eSSong GaoTRANS(vsubi_hu, LSX, gvec_subi, MO_16)
418*5c23704eSSong GaoTRANS(vsubi_wu, LSX, gvec_subi, MO_32)
419*5c23704eSSong GaoTRANS(vsubi_du, LSX, gvec_subi, MO_64)
420*5c23704eSSong GaoTRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi)
421*5c23704eSSong GaoTRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi)
422*5c23704eSSong GaoTRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi)
423*5c23704eSSong GaoTRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi)
424*5c23704eSSong GaoTRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8)
425*5c23704eSSong GaoTRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16)
426*5c23704eSSong GaoTRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32)
427*5c23704eSSong GaoTRANS(xvsubi_du, LASX, gvec_xsubi, MO_64)
428*5c23704eSSong Gao
429*5c23704eSSong GaoTRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
430*5c23704eSSong GaoTRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
431*5c23704eSSong GaoTRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg)
432*5c23704eSSong GaoTRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg)
433*5c23704eSSong GaoTRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg)
434*5c23704eSSong GaoTRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg)
435*5c23704eSSong GaoTRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg)
436*5c23704eSSong GaoTRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg)
437*5c23704eSSong Gao
438*5c23704eSSong GaoTRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
439*5c23704eSSong GaoTRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
440*5c23704eSSong GaoTRANS(vsadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ssadd)
441*5c23704eSSong GaoTRANS(vsadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ssadd)
442*5c23704eSSong GaoTRANS(vsadd_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_usadd)
443*5c23704eSSong GaoTRANS(vsadd_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_usadd)
444*5c23704eSSong GaoTRANS(vsadd_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_usadd)
445*5c23704eSSong GaoTRANS(vsadd_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_usadd)
446*5c23704eSSong GaoTRANS(vssub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sssub)
447*5c23704eSSong GaoTRANS(vssub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sssub)
448*5c23704eSSong GaoTRANS(vssub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sssub)
449*5c23704eSSong GaoTRANS(vssub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sssub)
450*5c23704eSSong GaoTRANS(vssub_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
451*5c23704eSSong GaoTRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
452*5c23704eSSong GaoTRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
453*5c23704eSSong GaoTRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
454*5c23704eSSong Gao
455*5c23704eSSong GaoTRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd)
456*5c23704eSSong GaoTRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd)
457*5c23704eSSong GaoTRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd)
458*5c23704eSSong GaoTRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd)
459*5c23704eSSong GaoTRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd)
460*5c23704eSSong GaoTRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd)
461*5c23704eSSong GaoTRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd)
462*5c23704eSSong GaoTRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd)
463*5c23704eSSong GaoTRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub)
464*5c23704eSSong GaoTRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub)
465*5c23704eSSong GaoTRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub)
466*5c23704eSSong GaoTRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub)
467*5c23704eSSong GaoTRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub)
468*5c23704eSSong GaoTRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub)
469*5c23704eSSong GaoTRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub)
470*5c23704eSSong GaoTRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub)
471*5c23704eSSong Gao
472*5c23704eSSong GaoTRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b)
473*5c23704eSSong GaoTRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h)
474*5c23704eSSong GaoTRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w)
475*5c23704eSSong GaoTRANS(vhaddw_q_d, LSX, gen_vvv, gen_helper_vhaddw_q_d)
476*5c23704eSSong GaoTRANS(vhaddw_hu_bu, LSX, gen_vvv, gen_helper_vhaddw_hu_bu)
477*5c23704eSSong GaoTRANS(vhaddw_wu_hu, LSX, gen_vvv, gen_helper_vhaddw_wu_hu)
478*5c23704eSSong GaoTRANS(vhaddw_du_wu, LSX, gen_vvv, gen_helper_vhaddw_du_wu)
479*5c23704eSSong GaoTRANS(vhaddw_qu_du, LSX, gen_vvv, gen_helper_vhaddw_qu_du)
480*5c23704eSSong GaoTRANS(vhsubw_h_b, LSX, gen_vvv, gen_helper_vhsubw_h_b)
481*5c23704eSSong GaoTRANS(vhsubw_w_h, LSX, gen_vvv, gen_helper_vhsubw_w_h)
482*5c23704eSSong GaoTRANS(vhsubw_d_w, LSX, gen_vvv, gen_helper_vhsubw_d_w)
483*5c23704eSSong GaoTRANS(vhsubw_q_d, LSX, gen_vvv, gen_helper_vhsubw_q_d)
484*5c23704eSSong GaoTRANS(vhsubw_hu_bu, LSX, gen_vvv, gen_helper_vhsubw_hu_bu)
485*5c23704eSSong GaoTRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu)
486*5c23704eSSong GaoTRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu)
487*5c23704eSSong GaoTRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du)
488*5c23704eSSong Gao
489*5c23704eSSong GaoTRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b)
490*5c23704eSSong GaoTRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h)
491*5c23704eSSong GaoTRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w)
492*5c23704eSSong GaoTRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d)
493*5c23704eSSong GaoTRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu)
494*5c23704eSSong GaoTRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu)
495*5c23704eSSong GaoTRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu)
496*5c23704eSSong GaoTRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du)
497*5c23704eSSong GaoTRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b)
498*5c23704eSSong GaoTRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h)
499*5c23704eSSong GaoTRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w)
500*5c23704eSSong GaoTRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d)
501*5c23704eSSong GaoTRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu)
502*5c23704eSSong GaoTRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu)
503*5c23704eSSong GaoTRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu)
504*5c23704eSSong GaoTRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du)
505*5c23704eSSong Gao
506*5c23704eSSong Gaostatic void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
507*5c23704eSSong Gao{
508*5c23704eSSong Gao    TCGv_vec t1, t2;
509*5c23704eSSong Gao
510*5c23704eSSong Gao    int halfbits = 4 << vece;
511*5c23704eSSong Gao
512*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
513*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
514*5c23704eSSong Gao
515*5c23704eSSong Gao    /* Sign-extend the even elements from a */
516*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t1, a, halfbits);
517*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t1, t1, halfbits);
518*5c23704eSSong Gao
519*5c23704eSSong Gao    /* Sign-extend the even elements from b */
520*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t2, b, halfbits);
521*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, t2, halfbits);
522*5c23704eSSong Gao
523*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t1, t2);
524*5c23704eSSong Gao}
525*5c23704eSSong Gao
526*5c23704eSSong Gaostatic void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
527*5c23704eSSong Gao{
528*5c23704eSSong Gao    TCGv_i32 t1, t2;
529*5c23704eSSong Gao
530*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
531*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
532*5c23704eSSong Gao    tcg_gen_ext16s_i32(t1, a);
533*5c23704eSSong Gao    tcg_gen_ext16s_i32(t2, b);
534*5c23704eSSong Gao    tcg_gen_add_i32(t, t1, t2);
535*5c23704eSSong Gao}
536*5c23704eSSong Gao
537*5c23704eSSong Gaostatic void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
538*5c23704eSSong Gao{
539*5c23704eSSong Gao    TCGv_i64 t1, t2;
540*5c23704eSSong Gao
541*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
542*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
543*5c23704eSSong Gao    tcg_gen_ext32s_i64(t1, a);
544*5c23704eSSong Gao    tcg_gen_ext32s_i64(t2, b);
545*5c23704eSSong Gao    tcg_gen_add_i64(t, t1, t2);
546*5c23704eSSong Gao}
547*5c23704eSSong Gao
548*5c23704eSSong Gaostatic void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
549*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
550*5c23704eSSong Gao{
551*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
552*5c23704eSSong Gao        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
553*5c23704eSSong Gao        };
554*5c23704eSSong Gao    static const GVecGen3 op[4] = {
555*5c23704eSSong Gao        {
556*5c23704eSSong Gao            .fniv = gen_vaddwev_s,
557*5c23704eSSong Gao            .fno = gen_helper_vaddwev_h_b,
558*5c23704eSSong Gao            .opt_opc = vecop_list,
559*5c23704eSSong Gao            .vece = MO_16
560*5c23704eSSong Gao        },
561*5c23704eSSong Gao        {
562*5c23704eSSong Gao            .fni4 = gen_vaddwev_w_h,
563*5c23704eSSong Gao            .fniv = gen_vaddwev_s,
564*5c23704eSSong Gao            .fno = gen_helper_vaddwev_w_h,
565*5c23704eSSong Gao            .opt_opc = vecop_list,
566*5c23704eSSong Gao            .vece = MO_32
567*5c23704eSSong Gao        },
568*5c23704eSSong Gao        {
569*5c23704eSSong Gao            .fni8 = gen_vaddwev_d_w,
570*5c23704eSSong Gao            .fniv = gen_vaddwev_s,
571*5c23704eSSong Gao            .fno = gen_helper_vaddwev_d_w,
572*5c23704eSSong Gao            .opt_opc = vecop_list,
573*5c23704eSSong Gao            .vece = MO_64
574*5c23704eSSong Gao        },
575*5c23704eSSong Gao        {
576*5c23704eSSong Gao            .fno = gen_helper_vaddwev_q_d,
577*5c23704eSSong Gao            .vece = MO_128
578*5c23704eSSong Gao        },
579*5c23704eSSong Gao    };
580*5c23704eSSong Gao
581*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
582*5c23704eSSong Gao}
583*5c23704eSSong Gao
584*5c23704eSSong GaoTRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s)
585*5c23704eSSong GaoTRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s)
586*5c23704eSSong GaoTRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s)
587*5c23704eSSong GaoTRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s)
588*5c23704eSSong GaoTRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s)
589*5c23704eSSong GaoTRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s)
590*5c23704eSSong GaoTRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s)
591*5c23704eSSong GaoTRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s)
592*5c23704eSSong Gao
593*5c23704eSSong Gaostatic void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
594*5c23704eSSong Gao{
595*5c23704eSSong Gao    TCGv_i32 t1, t2;
596*5c23704eSSong Gao
597*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
598*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
599*5c23704eSSong Gao    tcg_gen_sari_i32(t1, a, 16);
600*5c23704eSSong Gao    tcg_gen_sari_i32(t2, b, 16);
601*5c23704eSSong Gao    tcg_gen_add_i32(t, t1, t2);
602*5c23704eSSong Gao}
603*5c23704eSSong Gao
604*5c23704eSSong Gaostatic void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
605*5c23704eSSong Gao{
606*5c23704eSSong Gao    TCGv_i64 t1, t2;
607*5c23704eSSong Gao
608*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
609*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
610*5c23704eSSong Gao    tcg_gen_sari_i64(t1, a, 32);
611*5c23704eSSong Gao    tcg_gen_sari_i64(t2, b, 32);
612*5c23704eSSong Gao    tcg_gen_add_i64(t, t1, t2);
613*5c23704eSSong Gao}
614*5c23704eSSong Gao
615*5c23704eSSong Gaostatic void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
616*5c23704eSSong Gao{
617*5c23704eSSong Gao    TCGv_vec t1, t2;
618*5c23704eSSong Gao
619*5c23704eSSong Gao    int halfbits = 4 << vece;
620*5c23704eSSong Gao
621*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
622*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
623*5c23704eSSong Gao
624*5c23704eSSong Gao    /* Sign-extend the odd elements for vector */
625*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t1, a, halfbits);
626*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, b, halfbits);
627*5c23704eSSong Gao
628*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t1, t2);
629*5c23704eSSong Gao}
630*5c23704eSSong Gao
631*5c23704eSSong Gaostatic void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
632*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
633*5c23704eSSong Gao{
634*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
635*5c23704eSSong Gao        INDEX_op_sari_vec, INDEX_op_add_vec, 0
636*5c23704eSSong Gao        };
637*5c23704eSSong Gao    static const GVecGen3 op[4] = {
638*5c23704eSSong Gao        {
639*5c23704eSSong Gao            .fniv = gen_vaddwod_s,
640*5c23704eSSong Gao            .fno = gen_helper_vaddwod_h_b,
641*5c23704eSSong Gao            .opt_opc = vecop_list,
642*5c23704eSSong Gao            .vece = MO_16
643*5c23704eSSong Gao        },
644*5c23704eSSong Gao        {
645*5c23704eSSong Gao            .fni4 = gen_vaddwod_w_h,
646*5c23704eSSong Gao            .fniv = gen_vaddwod_s,
647*5c23704eSSong Gao            .fno = gen_helper_vaddwod_w_h,
648*5c23704eSSong Gao            .opt_opc = vecop_list,
649*5c23704eSSong Gao            .vece = MO_32
650*5c23704eSSong Gao        },
651*5c23704eSSong Gao        {
652*5c23704eSSong Gao            .fni8 = gen_vaddwod_d_w,
653*5c23704eSSong Gao            .fniv = gen_vaddwod_s,
654*5c23704eSSong Gao            .fno = gen_helper_vaddwod_d_w,
655*5c23704eSSong Gao            .opt_opc = vecop_list,
656*5c23704eSSong Gao            .vece = MO_64
657*5c23704eSSong Gao        },
658*5c23704eSSong Gao        {
659*5c23704eSSong Gao            .fno = gen_helper_vaddwod_q_d,
660*5c23704eSSong Gao            .vece = MO_128
661*5c23704eSSong Gao        },
662*5c23704eSSong Gao    };
663*5c23704eSSong Gao
664*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
665*5c23704eSSong Gao}
666*5c23704eSSong Gao
667*5c23704eSSong GaoTRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s)
668*5c23704eSSong GaoTRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s)
669*5c23704eSSong GaoTRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s)
670*5c23704eSSong GaoTRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s)
671*5c23704eSSong GaoTRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s)
672*5c23704eSSong GaoTRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s)
673*5c23704eSSong GaoTRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s)
674*5c23704eSSong GaoTRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s)
675*5c23704eSSong Gao
676*5c23704eSSong Gao
677*5c23704eSSong Gaostatic void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
678*5c23704eSSong Gao{
679*5c23704eSSong Gao    TCGv_vec t1, t2;
680*5c23704eSSong Gao
681*5c23704eSSong Gao    int halfbits = 4 << vece;
682*5c23704eSSong Gao
683*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
684*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
685*5c23704eSSong Gao
686*5c23704eSSong Gao    /* Sign-extend the even elements from a */
687*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t1, a, halfbits);
688*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t1, t1, halfbits);
689*5c23704eSSong Gao
690*5c23704eSSong Gao    /* Sign-extend the even elements from b */
691*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t2, b, halfbits);
692*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, t2, halfbits);
693*5c23704eSSong Gao
694*5c23704eSSong Gao    tcg_gen_sub_vec(vece, t, t1, t2);
695*5c23704eSSong Gao}
696*5c23704eSSong Gao
697*5c23704eSSong Gaostatic void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
698*5c23704eSSong Gao{
699*5c23704eSSong Gao    TCGv_i32 t1, t2;
700*5c23704eSSong Gao
701*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
702*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
703*5c23704eSSong Gao    tcg_gen_ext16s_i32(t1, a);
704*5c23704eSSong Gao    tcg_gen_ext16s_i32(t2, b);
705*5c23704eSSong Gao    tcg_gen_sub_i32(t, t1, t2);
706*5c23704eSSong Gao}
707*5c23704eSSong Gao
708*5c23704eSSong Gaostatic void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
709*5c23704eSSong Gao{
710*5c23704eSSong Gao    TCGv_i64 t1, t2;
711*5c23704eSSong Gao
712*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
713*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
714*5c23704eSSong Gao    tcg_gen_ext32s_i64(t1, a);
715*5c23704eSSong Gao    tcg_gen_ext32s_i64(t2, b);
716*5c23704eSSong Gao    tcg_gen_sub_i64(t, t1, t2);
717*5c23704eSSong Gao}
718*5c23704eSSong Gao
719*5c23704eSSong Gaostatic void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
720*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
721*5c23704eSSong Gao{
722*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
723*5c23704eSSong Gao        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0
724*5c23704eSSong Gao        };
725*5c23704eSSong Gao    static const GVecGen3 op[4] = {
726*5c23704eSSong Gao        {
727*5c23704eSSong Gao            .fniv = gen_vsubwev_s,
728*5c23704eSSong Gao            .fno = gen_helper_vsubwev_h_b,
729*5c23704eSSong Gao            .opt_opc = vecop_list,
730*5c23704eSSong Gao            .vece = MO_16
731*5c23704eSSong Gao        },
732*5c23704eSSong Gao        {
733*5c23704eSSong Gao            .fni4 = gen_vsubwev_w_h,
734*5c23704eSSong Gao            .fniv = gen_vsubwev_s,
735*5c23704eSSong Gao            .fno = gen_helper_vsubwev_w_h,
736*5c23704eSSong Gao            .opt_opc = vecop_list,
737*5c23704eSSong Gao            .vece = MO_32
738*5c23704eSSong Gao        },
739*5c23704eSSong Gao        {
740*5c23704eSSong Gao            .fni8 = gen_vsubwev_d_w,
741*5c23704eSSong Gao            .fniv = gen_vsubwev_s,
742*5c23704eSSong Gao            .fno = gen_helper_vsubwev_d_w,
743*5c23704eSSong Gao            .opt_opc = vecop_list,
744*5c23704eSSong Gao            .vece = MO_64
745*5c23704eSSong Gao        },
746*5c23704eSSong Gao        {
747*5c23704eSSong Gao            .fno = gen_helper_vsubwev_q_d,
748*5c23704eSSong Gao            .vece = MO_128
749*5c23704eSSong Gao        },
750*5c23704eSSong Gao    };
751*5c23704eSSong Gao
752*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
753*5c23704eSSong Gao}
754*5c23704eSSong Gao
755*5c23704eSSong GaoTRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s)
756*5c23704eSSong GaoTRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s)
757*5c23704eSSong GaoTRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s)
758*5c23704eSSong GaoTRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s)
759*5c23704eSSong GaoTRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s)
760*5c23704eSSong GaoTRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s)
761*5c23704eSSong GaoTRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s)
762*5c23704eSSong GaoTRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s)
763*5c23704eSSong Gao
764*5c23704eSSong Gaostatic void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
765*5c23704eSSong Gao{
766*5c23704eSSong Gao    TCGv_vec t1, t2;
767*5c23704eSSong Gao
768*5c23704eSSong Gao    int halfbits = 4 << vece;
769*5c23704eSSong Gao
770*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
771*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
772*5c23704eSSong Gao
773*5c23704eSSong Gao    /* Sign-extend the odd elements for vector */
774*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t1, a, halfbits);
775*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, b, halfbits);
776*5c23704eSSong Gao
777*5c23704eSSong Gao    tcg_gen_sub_vec(vece, t, t1, t2);
778*5c23704eSSong Gao}
779*5c23704eSSong Gao
780*5c23704eSSong Gaostatic void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
781*5c23704eSSong Gao{
782*5c23704eSSong Gao    TCGv_i32 t1, t2;
783*5c23704eSSong Gao
784*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
785*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
786*5c23704eSSong Gao    tcg_gen_sari_i32(t1, a, 16);
787*5c23704eSSong Gao    tcg_gen_sari_i32(t2, b, 16);
788*5c23704eSSong Gao    tcg_gen_sub_i32(t, t1, t2);
789*5c23704eSSong Gao}
790*5c23704eSSong Gao
791*5c23704eSSong Gaostatic void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
792*5c23704eSSong Gao{
793*5c23704eSSong Gao    TCGv_i64 t1, t2;
794*5c23704eSSong Gao
795*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
796*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
797*5c23704eSSong Gao    tcg_gen_sari_i64(t1, a, 32);
798*5c23704eSSong Gao    tcg_gen_sari_i64(t2, b, 32);
799*5c23704eSSong Gao    tcg_gen_sub_i64(t, t1, t2);
800*5c23704eSSong Gao}
801*5c23704eSSong Gao
802*5c23704eSSong Gaostatic void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
803*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
804*5c23704eSSong Gao{
805*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
806*5c23704eSSong Gao        INDEX_op_sari_vec, INDEX_op_sub_vec, 0
807*5c23704eSSong Gao        };
808*5c23704eSSong Gao    static const GVecGen3 op[4] = {
809*5c23704eSSong Gao        {
810*5c23704eSSong Gao            .fniv = gen_vsubwod_s,
811*5c23704eSSong Gao            .fno = gen_helper_vsubwod_h_b,
812*5c23704eSSong Gao            .opt_opc = vecop_list,
813*5c23704eSSong Gao            .vece = MO_16
814*5c23704eSSong Gao        },
815*5c23704eSSong Gao        {
816*5c23704eSSong Gao            .fni4 = gen_vsubwod_w_h,
817*5c23704eSSong Gao            .fniv = gen_vsubwod_s,
818*5c23704eSSong Gao            .fno = gen_helper_vsubwod_w_h,
819*5c23704eSSong Gao            .opt_opc = vecop_list,
820*5c23704eSSong Gao            .vece = MO_32
821*5c23704eSSong Gao        },
822*5c23704eSSong Gao        {
823*5c23704eSSong Gao            .fni8 = gen_vsubwod_d_w,
824*5c23704eSSong Gao            .fniv = gen_vsubwod_s,
825*5c23704eSSong Gao            .fno = gen_helper_vsubwod_d_w,
826*5c23704eSSong Gao            .opt_opc = vecop_list,
827*5c23704eSSong Gao            .vece = MO_64
828*5c23704eSSong Gao        },
829*5c23704eSSong Gao        {
830*5c23704eSSong Gao            .fno = gen_helper_vsubwod_q_d,
831*5c23704eSSong Gao            .vece = MO_128
832*5c23704eSSong Gao        },
833*5c23704eSSong Gao    };
834*5c23704eSSong Gao
835*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
836*5c23704eSSong Gao}
837*5c23704eSSong Gao
838*5c23704eSSong GaoTRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s)
839*5c23704eSSong GaoTRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s)
840*5c23704eSSong GaoTRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s)
841*5c23704eSSong GaoTRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s)
842*5c23704eSSong GaoTRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s)
843*5c23704eSSong GaoTRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s)
844*5c23704eSSong GaoTRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s)
845*5c23704eSSong GaoTRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s)
846*5c23704eSSong Gao
847*5c23704eSSong Gaostatic void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
848*5c23704eSSong Gao{
849*5c23704eSSong Gao    TCGv_vec t1, t2, t3;
850*5c23704eSSong Gao
851*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
852*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
853*5c23704eSSong Gao    t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
854*5c23704eSSong Gao    tcg_gen_and_vec(vece, t1, a, t3);
855*5c23704eSSong Gao    tcg_gen_and_vec(vece, t2, b, t3);
856*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t1, t2);
857*5c23704eSSong Gao}
858*5c23704eSSong Gao
859*5c23704eSSong Gaostatic void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
860*5c23704eSSong Gao{
861*5c23704eSSong Gao    TCGv_i32 t1, t2;
862*5c23704eSSong Gao
863*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
864*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
865*5c23704eSSong Gao    tcg_gen_ext16u_i32(t1, a);
866*5c23704eSSong Gao    tcg_gen_ext16u_i32(t2, b);
867*5c23704eSSong Gao    tcg_gen_add_i32(t, t1, t2);
868*5c23704eSSong Gao}
869*5c23704eSSong Gao
870*5c23704eSSong Gaostatic void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
871*5c23704eSSong Gao{
872*5c23704eSSong Gao    TCGv_i64 t1, t2;
873*5c23704eSSong Gao
874*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
875*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
876*5c23704eSSong Gao    tcg_gen_ext32u_i64(t1, a);
877*5c23704eSSong Gao    tcg_gen_ext32u_i64(t2, b);
878*5c23704eSSong Gao    tcg_gen_add_i64(t, t1, t2);
879*5c23704eSSong Gao}
880*5c23704eSSong Gao
881*5c23704eSSong Gaostatic void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
882*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
883*5c23704eSSong Gao{
884*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
885*5c23704eSSong Gao        INDEX_op_add_vec, 0
886*5c23704eSSong Gao        };
887*5c23704eSSong Gao    static const GVecGen3 op[4] = {
888*5c23704eSSong Gao        {
889*5c23704eSSong Gao            .fniv = gen_vaddwev_u,
890*5c23704eSSong Gao            .fno = gen_helper_vaddwev_h_bu,
891*5c23704eSSong Gao            .opt_opc = vecop_list,
892*5c23704eSSong Gao            .vece = MO_16
893*5c23704eSSong Gao        },
894*5c23704eSSong Gao        {
895*5c23704eSSong Gao            .fni4 = gen_vaddwev_w_hu,
896*5c23704eSSong Gao            .fniv = gen_vaddwev_u,
897*5c23704eSSong Gao            .fno = gen_helper_vaddwev_w_hu,
898*5c23704eSSong Gao            .opt_opc = vecop_list,
899*5c23704eSSong Gao            .vece = MO_32
900*5c23704eSSong Gao        },
901*5c23704eSSong Gao        {
902*5c23704eSSong Gao            .fni8 = gen_vaddwev_d_wu,
903*5c23704eSSong Gao            .fniv = gen_vaddwev_u,
904*5c23704eSSong Gao            .fno = gen_helper_vaddwev_d_wu,
905*5c23704eSSong Gao            .opt_opc = vecop_list,
906*5c23704eSSong Gao            .vece = MO_64
907*5c23704eSSong Gao        },
908*5c23704eSSong Gao        {
909*5c23704eSSong Gao            .fno = gen_helper_vaddwev_q_du,
910*5c23704eSSong Gao            .vece = MO_128
911*5c23704eSSong Gao        },
912*5c23704eSSong Gao    };
913*5c23704eSSong Gao
914*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
915*5c23704eSSong Gao}
916*5c23704eSSong Gao
917*5c23704eSSong GaoTRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u)
918*5c23704eSSong GaoTRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u)
919*5c23704eSSong GaoTRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u)
920*5c23704eSSong GaoTRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u)
921*5c23704eSSong GaoTRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u)
922*5c23704eSSong GaoTRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u)
923*5c23704eSSong GaoTRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u)
924*5c23704eSSong GaoTRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u)
925*5c23704eSSong Gao
926*5c23704eSSong Gaostatic void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
927*5c23704eSSong Gao{
928*5c23704eSSong Gao    TCGv_vec t1, t2;
929*5c23704eSSong Gao
930*5c23704eSSong Gao    int halfbits = 4 << vece;
931*5c23704eSSong Gao
932*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
933*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
934*5c23704eSSong Gao
935*5c23704eSSong Gao    /* Zero-extend the odd elements for vector */
936*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t1, a, halfbits);
937*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t2, b, halfbits);
938*5c23704eSSong Gao
939*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t1, t2);
940*5c23704eSSong Gao}
941*5c23704eSSong Gao
942*5c23704eSSong Gaostatic void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
943*5c23704eSSong Gao{
944*5c23704eSSong Gao    TCGv_i32 t1, t2;
945*5c23704eSSong Gao
946*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
947*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
948*5c23704eSSong Gao    tcg_gen_shri_i32(t1, a, 16);
949*5c23704eSSong Gao    tcg_gen_shri_i32(t2, b, 16);
950*5c23704eSSong Gao    tcg_gen_add_i32(t, t1, t2);
951*5c23704eSSong Gao}
952*5c23704eSSong Gao
953*5c23704eSSong Gaostatic void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
954*5c23704eSSong Gao{
955*5c23704eSSong Gao    TCGv_i64 t1, t2;
956*5c23704eSSong Gao
957*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
958*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
959*5c23704eSSong Gao    tcg_gen_shri_i64(t1, a, 32);
960*5c23704eSSong Gao    tcg_gen_shri_i64(t2, b, 32);
961*5c23704eSSong Gao    tcg_gen_add_i64(t, t1, t2);
962*5c23704eSSong Gao}
963*5c23704eSSong Gao
964*5c23704eSSong Gaostatic void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
965*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
966*5c23704eSSong Gao{
967*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
968*5c23704eSSong Gao        INDEX_op_shri_vec, INDEX_op_add_vec, 0
969*5c23704eSSong Gao        };
970*5c23704eSSong Gao    static const GVecGen3 op[4] = {
971*5c23704eSSong Gao        {
972*5c23704eSSong Gao            .fniv = gen_vaddwod_u,
973*5c23704eSSong Gao            .fno = gen_helper_vaddwod_h_bu,
974*5c23704eSSong Gao            .opt_opc = vecop_list,
975*5c23704eSSong Gao            .vece = MO_16
976*5c23704eSSong Gao        },
977*5c23704eSSong Gao        {
978*5c23704eSSong Gao            .fni4 = gen_vaddwod_w_hu,
979*5c23704eSSong Gao            .fniv = gen_vaddwod_u,
980*5c23704eSSong Gao            .fno = gen_helper_vaddwod_w_hu,
981*5c23704eSSong Gao            .opt_opc = vecop_list,
982*5c23704eSSong Gao            .vece = MO_32
983*5c23704eSSong Gao        },
984*5c23704eSSong Gao        {
985*5c23704eSSong Gao            .fni8 = gen_vaddwod_d_wu,
986*5c23704eSSong Gao            .fniv = gen_vaddwod_u,
987*5c23704eSSong Gao            .fno = gen_helper_vaddwod_d_wu,
988*5c23704eSSong Gao            .opt_opc = vecop_list,
989*5c23704eSSong Gao            .vece = MO_64
990*5c23704eSSong Gao        },
991*5c23704eSSong Gao        {
992*5c23704eSSong Gao            .fno = gen_helper_vaddwod_q_du,
993*5c23704eSSong Gao            .vece = MO_128
994*5c23704eSSong Gao        },
995*5c23704eSSong Gao    };
996*5c23704eSSong Gao
997*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
998*5c23704eSSong Gao}
999*5c23704eSSong Gao
1000*5c23704eSSong GaoTRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u)
1001*5c23704eSSong GaoTRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u)
1002*5c23704eSSong GaoTRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u)
1003*5c23704eSSong GaoTRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u)
1004*5c23704eSSong GaoTRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u)
1005*5c23704eSSong GaoTRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u)
1006*5c23704eSSong GaoTRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u)
1007*5c23704eSSong GaoTRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u)
1008*5c23704eSSong Gao
1009*5c23704eSSong Gaostatic void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1010*5c23704eSSong Gao{
1011*5c23704eSSong Gao    TCGv_vec t1, t2, t3;
1012*5c23704eSSong Gao
1013*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
1014*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
1015*5c23704eSSong Gao    t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
1016*5c23704eSSong Gao    tcg_gen_and_vec(vece, t1, a, t3);
1017*5c23704eSSong Gao    tcg_gen_and_vec(vece, t2, b, t3);
1018*5c23704eSSong Gao    tcg_gen_sub_vec(vece, t, t1, t2);
1019*5c23704eSSong Gao}
1020*5c23704eSSong Gao
1021*5c23704eSSong Gaostatic void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1022*5c23704eSSong Gao{
1023*5c23704eSSong Gao    TCGv_i32 t1, t2;
1024*5c23704eSSong Gao
1025*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
1026*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
1027*5c23704eSSong Gao    tcg_gen_ext16u_i32(t1, a);
1028*5c23704eSSong Gao    tcg_gen_ext16u_i32(t2, b);
1029*5c23704eSSong Gao    tcg_gen_sub_i32(t, t1, t2);
1030*5c23704eSSong Gao}
1031*5c23704eSSong Gao
1032*5c23704eSSong Gaostatic void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1033*5c23704eSSong Gao{
1034*5c23704eSSong Gao    TCGv_i64 t1, t2;
1035*5c23704eSSong Gao
1036*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
1037*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
1038*5c23704eSSong Gao    tcg_gen_ext32u_i64(t1, a);
1039*5c23704eSSong Gao    tcg_gen_ext32u_i64(t2, b);
1040*5c23704eSSong Gao    tcg_gen_sub_i64(t, t1, t2);
1041*5c23704eSSong Gao}
1042*5c23704eSSong Gao
1043*5c23704eSSong Gaostatic void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1044*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1045*5c23704eSSong Gao{
1046*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1047*5c23704eSSong Gao        INDEX_op_sub_vec, 0
1048*5c23704eSSong Gao        };
1049*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1050*5c23704eSSong Gao        {
1051*5c23704eSSong Gao            .fniv = gen_vsubwev_u,
1052*5c23704eSSong Gao            .fno = gen_helper_vsubwev_h_bu,
1053*5c23704eSSong Gao            .opt_opc = vecop_list,
1054*5c23704eSSong Gao            .vece = MO_16
1055*5c23704eSSong Gao        },
1056*5c23704eSSong Gao        {
1057*5c23704eSSong Gao            .fni4 = gen_vsubwev_w_hu,
1058*5c23704eSSong Gao            .fniv = gen_vsubwev_u,
1059*5c23704eSSong Gao            .fno = gen_helper_vsubwev_w_hu,
1060*5c23704eSSong Gao            .opt_opc = vecop_list,
1061*5c23704eSSong Gao            .vece = MO_32
1062*5c23704eSSong Gao        },
1063*5c23704eSSong Gao        {
1064*5c23704eSSong Gao            .fni8 = gen_vsubwev_d_wu,
1065*5c23704eSSong Gao            .fniv = gen_vsubwev_u,
1066*5c23704eSSong Gao            .fno = gen_helper_vsubwev_d_wu,
1067*5c23704eSSong Gao            .opt_opc = vecop_list,
1068*5c23704eSSong Gao            .vece = MO_64
1069*5c23704eSSong Gao        },
1070*5c23704eSSong Gao        {
1071*5c23704eSSong Gao            .fno = gen_helper_vsubwev_q_du,
1072*5c23704eSSong Gao            .vece = MO_128
1073*5c23704eSSong Gao        },
1074*5c23704eSSong Gao    };
1075*5c23704eSSong Gao
1076*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1077*5c23704eSSong Gao}
1078*5c23704eSSong Gao
1079*5c23704eSSong GaoTRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u)
1080*5c23704eSSong GaoTRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u)
1081*5c23704eSSong GaoTRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u)
1082*5c23704eSSong GaoTRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u)
1083*5c23704eSSong GaoTRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u)
1084*5c23704eSSong GaoTRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u)
1085*5c23704eSSong GaoTRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u)
1086*5c23704eSSong GaoTRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u)
1087*5c23704eSSong Gao
1088*5c23704eSSong Gaostatic void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1089*5c23704eSSong Gao{
1090*5c23704eSSong Gao    TCGv_vec t1, t2;
1091*5c23704eSSong Gao
1092*5c23704eSSong Gao    int halfbits = 4 << vece;
1093*5c23704eSSong Gao
1094*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
1095*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
1096*5c23704eSSong Gao
1097*5c23704eSSong Gao    /* Zero-extend the odd elements for vector */
1098*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t1, a, halfbits);
1099*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t2, b, halfbits);
1100*5c23704eSSong Gao
1101*5c23704eSSong Gao    tcg_gen_sub_vec(vece, t, t1, t2);
1102*5c23704eSSong Gao}
1103*5c23704eSSong Gao
1104*5c23704eSSong Gaostatic void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1105*5c23704eSSong Gao{
1106*5c23704eSSong Gao    TCGv_i32 t1, t2;
1107*5c23704eSSong Gao
1108*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
1109*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
1110*5c23704eSSong Gao    tcg_gen_shri_i32(t1, a, 16);
1111*5c23704eSSong Gao    tcg_gen_shri_i32(t2, b, 16);
1112*5c23704eSSong Gao    tcg_gen_sub_i32(t, t1, t2);
1113*5c23704eSSong Gao}
1114*5c23704eSSong Gao
1115*5c23704eSSong Gaostatic void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1116*5c23704eSSong Gao{
1117*5c23704eSSong Gao    TCGv_i64 t1, t2;
1118*5c23704eSSong Gao
1119*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
1120*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
1121*5c23704eSSong Gao    tcg_gen_shri_i64(t1, a, 32);
1122*5c23704eSSong Gao    tcg_gen_shri_i64(t2, b, 32);
1123*5c23704eSSong Gao    tcg_gen_sub_i64(t, t1, t2);
1124*5c23704eSSong Gao}
1125*5c23704eSSong Gao
1126*5c23704eSSong Gaostatic void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1127*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1128*5c23704eSSong Gao{
1129*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1130*5c23704eSSong Gao        INDEX_op_shri_vec, INDEX_op_sub_vec, 0
1131*5c23704eSSong Gao        };
1132*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1133*5c23704eSSong Gao        {
1134*5c23704eSSong Gao            .fniv = gen_vsubwod_u,
1135*5c23704eSSong Gao            .fno = gen_helper_vsubwod_h_bu,
1136*5c23704eSSong Gao            .opt_opc = vecop_list,
1137*5c23704eSSong Gao            .vece = MO_16
1138*5c23704eSSong Gao        },
1139*5c23704eSSong Gao        {
1140*5c23704eSSong Gao            .fni4 = gen_vsubwod_w_hu,
1141*5c23704eSSong Gao            .fniv = gen_vsubwod_u,
1142*5c23704eSSong Gao            .fno = gen_helper_vsubwod_w_hu,
1143*5c23704eSSong Gao            .opt_opc = vecop_list,
1144*5c23704eSSong Gao            .vece = MO_32
1145*5c23704eSSong Gao        },
1146*5c23704eSSong Gao        {
1147*5c23704eSSong Gao            .fni8 = gen_vsubwod_d_wu,
1148*5c23704eSSong Gao            .fniv = gen_vsubwod_u,
1149*5c23704eSSong Gao            .fno = gen_helper_vsubwod_d_wu,
1150*5c23704eSSong Gao            .opt_opc = vecop_list,
1151*5c23704eSSong Gao            .vece = MO_64
1152*5c23704eSSong Gao        },
1153*5c23704eSSong Gao        {
1154*5c23704eSSong Gao            .fno = gen_helper_vsubwod_q_du,
1155*5c23704eSSong Gao            .vece = MO_128
1156*5c23704eSSong Gao        },
1157*5c23704eSSong Gao    };
1158*5c23704eSSong Gao
1159*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1160*5c23704eSSong Gao}
1161*5c23704eSSong Gao
1162*5c23704eSSong GaoTRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u)
1163*5c23704eSSong GaoTRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u)
1164*5c23704eSSong GaoTRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u)
1165*5c23704eSSong GaoTRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u)
1166*5c23704eSSong GaoTRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u)
1167*5c23704eSSong GaoTRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u)
1168*5c23704eSSong GaoTRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u)
1169*5c23704eSSong GaoTRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u)
1170*5c23704eSSong Gao
1171*5c23704eSSong Gaostatic void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1172*5c23704eSSong Gao{
1173*5c23704eSSong Gao    TCGv_vec t1, t2, t3;
1174*5c23704eSSong Gao
1175*5c23704eSSong Gao    int halfbits = 4 << vece;
1176*5c23704eSSong Gao
1177*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
1178*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
1179*5c23704eSSong Gao    t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits));
1180*5c23704eSSong Gao
1181*5c23704eSSong Gao    /* Zero-extend the even elements from a */
1182*5c23704eSSong Gao    tcg_gen_and_vec(vece, t1, a, t3);
1183*5c23704eSSong Gao
1184*5c23704eSSong Gao    /* Sign-extend the even elements from b */
1185*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t2, b, halfbits);
1186*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, t2, halfbits);
1187*5c23704eSSong Gao
1188*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t1, t2);
1189*5c23704eSSong Gao}
1190*5c23704eSSong Gao
1191*5c23704eSSong Gaostatic void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1192*5c23704eSSong Gao{
1193*5c23704eSSong Gao    TCGv_i32 t1, t2;
1194*5c23704eSSong Gao
1195*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
1196*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
1197*5c23704eSSong Gao    tcg_gen_ext16u_i32(t1, a);
1198*5c23704eSSong Gao    tcg_gen_ext16s_i32(t2, b);
1199*5c23704eSSong Gao    tcg_gen_add_i32(t, t1, t2);
1200*5c23704eSSong Gao}
1201*5c23704eSSong Gao
1202*5c23704eSSong Gaostatic void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1203*5c23704eSSong Gao{
1204*5c23704eSSong Gao    TCGv_i64 t1, t2;
1205*5c23704eSSong Gao
1206*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
1207*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
1208*5c23704eSSong Gao    tcg_gen_ext32u_i64(t1, a);
1209*5c23704eSSong Gao    tcg_gen_ext32s_i64(t2, b);
1210*5c23704eSSong Gao    tcg_gen_add_i64(t, t1, t2);
1211*5c23704eSSong Gao}
1212*5c23704eSSong Gao
1213*5c23704eSSong Gaostatic void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1214*5c23704eSSong Gao                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1215*5c23704eSSong Gao{
1216*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1217*5c23704eSSong Gao        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
1218*5c23704eSSong Gao        };
1219*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1220*5c23704eSSong Gao        {
1221*5c23704eSSong Gao            .fniv = gen_vaddwev_u_s,
1222*5c23704eSSong Gao            .fno = gen_helper_vaddwev_h_bu_b,
1223*5c23704eSSong Gao            .opt_opc = vecop_list,
1224*5c23704eSSong Gao            .vece = MO_16
1225*5c23704eSSong Gao        },
1226*5c23704eSSong Gao        {
1227*5c23704eSSong Gao            .fni4 = gen_vaddwev_w_hu_h,
1228*5c23704eSSong Gao            .fniv = gen_vaddwev_u_s,
1229*5c23704eSSong Gao            .fno = gen_helper_vaddwev_w_hu_h,
1230*5c23704eSSong Gao            .opt_opc = vecop_list,
1231*5c23704eSSong Gao            .vece = MO_32
1232*5c23704eSSong Gao        },
1233*5c23704eSSong Gao        {
1234*5c23704eSSong Gao            .fni8 = gen_vaddwev_d_wu_w,
1235*5c23704eSSong Gao            .fniv = gen_vaddwev_u_s,
1236*5c23704eSSong Gao            .fno = gen_helper_vaddwev_d_wu_w,
1237*5c23704eSSong Gao            .opt_opc = vecop_list,
1238*5c23704eSSong Gao            .vece = MO_64
1239*5c23704eSSong Gao        },
1240*5c23704eSSong Gao        {
1241*5c23704eSSong Gao            .fno = gen_helper_vaddwev_q_du_d,
1242*5c23704eSSong Gao            .vece = MO_128
1243*5c23704eSSong Gao        },
1244*5c23704eSSong Gao    };
1245*5c23704eSSong Gao
1246*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1247*5c23704eSSong Gao}
1248*5c23704eSSong Gao
1249*5c23704eSSong GaoTRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s)
1250*5c23704eSSong GaoTRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s)
1251*5c23704eSSong GaoTRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s)
1252*5c23704eSSong GaoTRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s)
1253*5c23704eSSong GaoTRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s)
1254*5c23704eSSong GaoTRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s)
1255*5c23704eSSong GaoTRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s)
1256*5c23704eSSong GaoTRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s)
1257*5c23704eSSong Gao
1258*5c23704eSSong Gaostatic void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1259*5c23704eSSong Gao{
1260*5c23704eSSong Gao    TCGv_vec t1, t2;
1261*5c23704eSSong Gao
1262*5c23704eSSong Gao    int halfbits = 4 << vece;
1263*5c23704eSSong Gao
1264*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
1265*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
1266*5c23704eSSong Gao
1267*5c23704eSSong Gao    /* Zero-extend the odd elements from a */
1268*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t1, a, halfbits);
1269*5c23704eSSong Gao    /* Sign-extend the odd elements from b */
1270*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, b, halfbits);
1271*5c23704eSSong Gao
1272*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t1, t2);
1273*5c23704eSSong Gao}
1274*5c23704eSSong Gao
1275*5c23704eSSong Gaostatic void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1276*5c23704eSSong Gao{
1277*5c23704eSSong Gao    TCGv_i32 t1, t2;
1278*5c23704eSSong Gao
1279*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
1280*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
1281*5c23704eSSong Gao    tcg_gen_shri_i32(t1, a, 16);
1282*5c23704eSSong Gao    tcg_gen_sari_i32(t2, b, 16);
1283*5c23704eSSong Gao    tcg_gen_add_i32(t, t1, t2);
1284*5c23704eSSong Gao}
1285*5c23704eSSong Gao
1286*5c23704eSSong Gaostatic void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1287*5c23704eSSong Gao{
1288*5c23704eSSong Gao    TCGv_i64 t1, t2;
1289*5c23704eSSong Gao
1290*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
1291*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
1292*5c23704eSSong Gao    tcg_gen_shri_i64(t1, a, 32);
1293*5c23704eSSong Gao    tcg_gen_sari_i64(t2, b, 32);
1294*5c23704eSSong Gao    tcg_gen_add_i64(t, t1, t2);
1295*5c23704eSSong Gao}
1296*5c23704eSSong Gao
1297*5c23704eSSong Gaostatic void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1298*5c23704eSSong Gao                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1299*5c23704eSSong Gao{
1300*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1301*5c23704eSSong Gao        INDEX_op_shri_vec, INDEX_op_sari_vec,  INDEX_op_add_vec, 0
1302*5c23704eSSong Gao        };
1303*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1304*5c23704eSSong Gao        {
1305*5c23704eSSong Gao            .fniv = gen_vaddwod_u_s,
1306*5c23704eSSong Gao            .fno = gen_helper_vaddwod_h_bu_b,
1307*5c23704eSSong Gao            .opt_opc = vecop_list,
1308*5c23704eSSong Gao            .vece = MO_16
1309*5c23704eSSong Gao        },
1310*5c23704eSSong Gao        {
1311*5c23704eSSong Gao            .fni4 = gen_vaddwod_w_hu_h,
1312*5c23704eSSong Gao            .fniv = gen_vaddwod_u_s,
1313*5c23704eSSong Gao            .fno = gen_helper_vaddwod_w_hu_h,
1314*5c23704eSSong Gao            .opt_opc = vecop_list,
1315*5c23704eSSong Gao            .vece = MO_32
1316*5c23704eSSong Gao        },
1317*5c23704eSSong Gao        {
1318*5c23704eSSong Gao            .fni8 = gen_vaddwod_d_wu_w,
1319*5c23704eSSong Gao            .fniv = gen_vaddwod_u_s,
1320*5c23704eSSong Gao            .fno = gen_helper_vaddwod_d_wu_w,
1321*5c23704eSSong Gao            .opt_opc = vecop_list,
1322*5c23704eSSong Gao            .vece = MO_64
1323*5c23704eSSong Gao        },
1324*5c23704eSSong Gao        {
1325*5c23704eSSong Gao            .fno = gen_helper_vaddwod_q_du_d,
1326*5c23704eSSong Gao            .vece = MO_128
1327*5c23704eSSong Gao        },
1328*5c23704eSSong Gao    };
1329*5c23704eSSong Gao
1330*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1331*5c23704eSSong Gao}
1332*5c23704eSSong Gao
1333*5c23704eSSong GaoTRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s)
1334*5c23704eSSong GaoTRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s)
1335*5c23704eSSong GaoTRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s)
1336*5c23704eSSong GaoTRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s)
1337*5c23704eSSong GaoTRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s)
1338*5c23704eSSong GaoTRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s)
1339*5c23704eSSong GaoTRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s)
1340*5c23704eSSong GaoTRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s)
1341*5c23704eSSong Gao
1342*5c23704eSSong Gaostatic void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
1343*5c23704eSSong Gao                    void (*gen_shr_vec)(unsigned, TCGv_vec,
1344*5c23704eSSong Gao                                        TCGv_vec, int64_t),
1345*5c23704eSSong Gao                    void (*gen_round_vec)(unsigned, TCGv_vec,
1346*5c23704eSSong Gao                                          TCGv_vec, TCGv_vec))
1347*5c23704eSSong Gao{
1348*5c23704eSSong Gao    TCGv_vec tmp = tcg_temp_new_vec_matching(t);
1349*5c23704eSSong Gao    gen_round_vec(vece, tmp, a, b);
1350*5c23704eSSong Gao    tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
1351*5c23704eSSong Gao    gen_shr_vec(vece, a, a, 1);
1352*5c23704eSSong Gao    gen_shr_vec(vece, b, b, 1);
1353*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, a, b);
1354*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t, tmp);
1355*5c23704eSSong Gao}
1356*5c23704eSSong Gao
1357*5c23704eSSong Gaostatic void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1358*5c23704eSSong Gao{
1359*5c23704eSSong Gao    do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec);
1360*5c23704eSSong Gao}
1361*5c23704eSSong Gao
1362*5c23704eSSong Gaostatic void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1363*5c23704eSSong Gao{
1364*5c23704eSSong Gao    do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec);
1365*5c23704eSSong Gao}
1366*5c23704eSSong Gao
1367*5c23704eSSong Gaostatic void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1368*5c23704eSSong Gao{
1369*5c23704eSSong Gao    do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec);
1370*5c23704eSSong Gao}
1371*5c23704eSSong Gao
1372*5c23704eSSong Gaostatic void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1373*5c23704eSSong Gao{
1374*5c23704eSSong Gao    do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec);
1375*5c23704eSSong Gao}
1376*5c23704eSSong Gao
1377*5c23704eSSong Gaostatic void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1378*5c23704eSSong Gao                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1379*5c23704eSSong Gao{
1380*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1381*5c23704eSSong Gao        INDEX_op_sari_vec, INDEX_op_add_vec, 0
1382*5c23704eSSong Gao        };
1383*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1384*5c23704eSSong Gao        {
1385*5c23704eSSong Gao            .fniv = gen_vavg_s,
1386*5c23704eSSong Gao            .fno = gen_helper_vavg_b,
1387*5c23704eSSong Gao            .opt_opc = vecop_list,
1388*5c23704eSSong Gao            .vece = MO_8
1389*5c23704eSSong Gao        },
1390*5c23704eSSong Gao        {
1391*5c23704eSSong Gao            .fniv = gen_vavg_s,
1392*5c23704eSSong Gao            .fno = gen_helper_vavg_h,
1393*5c23704eSSong Gao            .opt_opc = vecop_list,
1394*5c23704eSSong Gao            .vece = MO_16
1395*5c23704eSSong Gao        },
1396*5c23704eSSong Gao        {
1397*5c23704eSSong Gao            .fniv = gen_vavg_s,
1398*5c23704eSSong Gao            .fno = gen_helper_vavg_w,
1399*5c23704eSSong Gao            .opt_opc = vecop_list,
1400*5c23704eSSong Gao            .vece = MO_32
1401*5c23704eSSong Gao        },
1402*5c23704eSSong Gao        {
1403*5c23704eSSong Gao            .fniv = gen_vavg_s,
1404*5c23704eSSong Gao            .fno = gen_helper_vavg_d,
1405*5c23704eSSong Gao            .opt_opc = vecop_list,
1406*5c23704eSSong Gao            .vece = MO_64
1407*5c23704eSSong Gao        },
1408*5c23704eSSong Gao    };
1409*5c23704eSSong Gao
1410*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1411*5c23704eSSong Gao}
1412*5c23704eSSong Gao
1413*5c23704eSSong Gaostatic void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1414*5c23704eSSong Gao                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1415*5c23704eSSong Gao{
1416*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1417*5c23704eSSong Gao        INDEX_op_shri_vec, INDEX_op_add_vec, 0
1418*5c23704eSSong Gao        };
1419*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1420*5c23704eSSong Gao        {
1421*5c23704eSSong Gao            .fniv = gen_vavg_u,
1422*5c23704eSSong Gao            .fno = gen_helper_vavg_bu,
1423*5c23704eSSong Gao            .opt_opc = vecop_list,
1424*5c23704eSSong Gao            .vece = MO_8
1425*5c23704eSSong Gao        },
1426*5c23704eSSong Gao        {
1427*5c23704eSSong Gao            .fniv = gen_vavg_u,
1428*5c23704eSSong Gao            .fno = gen_helper_vavg_hu,
1429*5c23704eSSong Gao            .opt_opc = vecop_list,
1430*5c23704eSSong Gao            .vece = MO_16
1431*5c23704eSSong Gao        },
1432*5c23704eSSong Gao        {
1433*5c23704eSSong Gao            .fniv = gen_vavg_u,
1434*5c23704eSSong Gao            .fno = gen_helper_vavg_wu,
1435*5c23704eSSong Gao            .opt_opc = vecop_list,
1436*5c23704eSSong Gao            .vece = MO_32
1437*5c23704eSSong Gao        },
1438*5c23704eSSong Gao        {
1439*5c23704eSSong Gao            .fniv = gen_vavg_u,
1440*5c23704eSSong Gao            .fno = gen_helper_vavg_du,
1441*5c23704eSSong Gao            .opt_opc = vecop_list,
1442*5c23704eSSong Gao            .vece = MO_64
1443*5c23704eSSong Gao        },
1444*5c23704eSSong Gao    };
1445*5c23704eSSong Gao
1446*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1447*5c23704eSSong Gao}
1448*5c23704eSSong Gao
1449*5c23704eSSong GaoTRANS(vavg_b, LSX, gvec_vvv, MO_8, do_vavg_s)
1450*5c23704eSSong GaoTRANS(vavg_h, LSX, gvec_vvv, MO_16, do_vavg_s)
1451*5c23704eSSong GaoTRANS(vavg_w, LSX, gvec_vvv, MO_32, do_vavg_s)
1452*5c23704eSSong GaoTRANS(vavg_d, LSX, gvec_vvv, MO_64, do_vavg_s)
1453*5c23704eSSong GaoTRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u)
1454*5c23704eSSong GaoTRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u)
1455*5c23704eSSong GaoTRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u)
1456*5c23704eSSong GaoTRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u)
1457*5c23704eSSong GaoTRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s)
1458*5c23704eSSong GaoTRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s)
1459*5c23704eSSong GaoTRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s)
1460*5c23704eSSong GaoTRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s)
1461*5c23704eSSong GaoTRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u)
1462*5c23704eSSong GaoTRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u)
1463*5c23704eSSong GaoTRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u)
1464*5c23704eSSong GaoTRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u)
1465*5c23704eSSong Gao
1466*5c23704eSSong Gaostatic void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1467*5c23704eSSong Gao                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1468*5c23704eSSong Gao{
1469*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1470*5c23704eSSong Gao        INDEX_op_sari_vec, INDEX_op_add_vec, 0
1471*5c23704eSSong Gao        };
1472*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1473*5c23704eSSong Gao        {
1474*5c23704eSSong Gao            .fniv = gen_vavgr_s,
1475*5c23704eSSong Gao            .fno = gen_helper_vavgr_b,
1476*5c23704eSSong Gao            .opt_opc = vecop_list,
1477*5c23704eSSong Gao            .vece = MO_8
1478*5c23704eSSong Gao        },
1479*5c23704eSSong Gao        {
1480*5c23704eSSong Gao            .fniv = gen_vavgr_s,
1481*5c23704eSSong Gao            .fno = gen_helper_vavgr_h,
1482*5c23704eSSong Gao            .opt_opc = vecop_list,
1483*5c23704eSSong Gao            .vece = MO_16
1484*5c23704eSSong Gao        },
1485*5c23704eSSong Gao        {
1486*5c23704eSSong Gao            .fniv = gen_vavgr_s,
1487*5c23704eSSong Gao            .fno = gen_helper_vavgr_w,
1488*5c23704eSSong Gao            .opt_opc = vecop_list,
1489*5c23704eSSong Gao            .vece = MO_32
1490*5c23704eSSong Gao        },
1491*5c23704eSSong Gao        {
1492*5c23704eSSong Gao            .fniv = gen_vavgr_s,
1493*5c23704eSSong Gao            .fno = gen_helper_vavgr_d,
1494*5c23704eSSong Gao            .opt_opc = vecop_list,
1495*5c23704eSSong Gao            .vece = MO_64
1496*5c23704eSSong Gao        },
1497*5c23704eSSong Gao    };
1498*5c23704eSSong Gao
1499*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1500*5c23704eSSong Gao}
1501*5c23704eSSong Gao
1502*5c23704eSSong Gaostatic void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1503*5c23704eSSong Gao                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1504*5c23704eSSong Gao{
1505*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1506*5c23704eSSong Gao        INDEX_op_shri_vec, INDEX_op_add_vec, 0
1507*5c23704eSSong Gao        };
1508*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1509*5c23704eSSong Gao        {
1510*5c23704eSSong Gao            .fniv = gen_vavgr_u,
1511*5c23704eSSong Gao            .fno = gen_helper_vavgr_bu,
1512*5c23704eSSong Gao            .opt_opc = vecop_list,
1513*5c23704eSSong Gao            .vece = MO_8
1514*5c23704eSSong Gao        },
1515*5c23704eSSong Gao        {
1516*5c23704eSSong Gao            .fniv = gen_vavgr_u,
1517*5c23704eSSong Gao            .fno = gen_helper_vavgr_hu,
1518*5c23704eSSong Gao            .opt_opc = vecop_list,
1519*5c23704eSSong Gao            .vece = MO_16
1520*5c23704eSSong Gao        },
1521*5c23704eSSong Gao        {
1522*5c23704eSSong Gao            .fniv = gen_vavgr_u,
1523*5c23704eSSong Gao            .fno = gen_helper_vavgr_wu,
1524*5c23704eSSong Gao            .opt_opc = vecop_list,
1525*5c23704eSSong Gao            .vece = MO_32
1526*5c23704eSSong Gao        },
1527*5c23704eSSong Gao        {
1528*5c23704eSSong Gao            .fniv = gen_vavgr_u,
1529*5c23704eSSong Gao            .fno = gen_helper_vavgr_du,
1530*5c23704eSSong Gao            .opt_opc = vecop_list,
1531*5c23704eSSong Gao            .vece = MO_64
1532*5c23704eSSong Gao        },
1533*5c23704eSSong Gao    };
1534*5c23704eSSong Gao
1535*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1536*5c23704eSSong Gao}
1537*5c23704eSSong Gao
1538*5c23704eSSong GaoTRANS(vavgr_b, LSX, gvec_vvv, MO_8, do_vavgr_s)
1539*5c23704eSSong GaoTRANS(vavgr_h, LSX, gvec_vvv, MO_16, do_vavgr_s)
1540*5c23704eSSong GaoTRANS(vavgr_w, LSX, gvec_vvv, MO_32, do_vavgr_s)
1541*5c23704eSSong GaoTRANS(vavgr_d, LSX, gvec_vvv, MO_64, do_vavgr_s)
1542*5c23704eSSong GaoTRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u)
1543*5c23704eSSong GaoTRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u)
1544*5c23704eSSong GaoTRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u)
1545*5c23704eSSong GaoTRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u)
1546*5c23704eSSong GaoTRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s)
1547*5c23704eSSong GaoTRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s)
1548*5c23704eSSong GaoTRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s)
1549*5c23704eSSong GaoTRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s)
1550*5c23704eSSong GaoTRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u)
1551*5c23704eSSong GaoTRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u)
1552*5c23704eSSong GaoTRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u)
1553*5c23704eSSong GaoTRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u)
1554*5c23704eSSong Gao
1555*5c23704eSSong Gaostatic void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1556*5c23704eSSong Gao{
1557*5c23704eSSong Gao    tcg_gen_smax_vec(vece, t, a, b);
1558*5c23704eSSong Gao    tcg_gen_smin_vec(vece, a, a, b);
1559*5c23704eSSong Gao    tcg_gen_sub_vec(vece, t, t, a);
1560*5c23704eSSong Gao}
1561*5c23704eSSong Gao
1562*5c23704eSSong Gaostatic void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1563*5c23704eSSong Gao                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1564*5c23704eSSong Gao{
1565*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1566*5c23704eSSong Gao        INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0
1567*5c23704eSSong Gao        };
1568*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1569*5c23704eSSong Gao        {
1570*5c23704eSSong Gao            .fniv = gen_vabsd_s,
1571*5c23704eSSong Gao            .fno = gen_helper_vabsd_b,
1572*5c23704eSSong Gao            .opt_opc = vecop_list,
1573*5c23704eSSong Gao            .vece = MO_8
1574*5c23704eSSong Gao        },
1575*5c23704eSSong Gao        {
1576*5c23704eSSong Gao            .fniv = gen_vabsd_s,
1577*5c23704eSSong Gao            .fno = gen_helper_vabsd_h,
1578*5c23704eSSong Gao            .opt_opc = vecop_list,
1579*5c23704eSSong Gao            .vece = MO_16
1580*5c23704eSSong Gao        },
1581*5c23704eSSong Gao        {
1582*5c23704eSSong Gao            .fniv = gen_vabsd_s,
1583*5c23704eSSong Gao            .fno = gen_helper_vabsd_w,
1584*5c23704eSSong Gao            .opt_opc = vecop_list,
1585*5c23704eSSong Gao            .vece = MO_32
1586*5c23704eSSong Gao        },
1587*5c23704eSSong Gao        {
1588*5c23704eSSong Gao            .fniv = gen_vabsd_s,
1589*5c23704eSSong Gao            .fno = gen_helper_vabsd_d,
1590*5c23704eSSong Gao            .opt_opc = vecop_list,
1591*5c23704eSSong Gao            .vece = MO_64
1592*5c23704eSSong Gao        },
1593*5c23704eSSong Gao    };
1594*5c23704eSSong Gao
1595*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1596*5c23704eSSong Gao}
1597*5c23704eSSong Gao
1598*5c23704eSSong Gaostatic void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1599*5c23704eSSong Gao{
1600*5c23704eSSong Gao    tcg_gen_umax_vec(vece, t, a, b);
1601*5c23704eSSong Gao    tcg_gen_umin_vec(vece, a, a, b);
1602*5c23704eSSong Gao    tcg_gen_sub_vec(vece, t, t, a);
1603*5c23704eSSong Gao}
1604*5c23704eSSong Gao
1605*5c23704eSSong Gaostatic void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1606*5c23704eSSong Gao                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1607*5c23704eSSong Gao{
1608*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1609*5c23704eSSong Gao        INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
1610*5c23704eSSong Gao        };
1611*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1612*5c23704eSSong Gao        {
1613*5c23704eSSong Gao            .fniv = gen_vabsd_u,
1614*5c23704eSSong Gao            .fno = gen_helper_vabsd_bu,
1615*5c23704eSSong Gao            .opt_opc = vecop_list,
1616*5c23704eSSong Gao            .vece = MO_8
1617*5c23704eSSong Gao        },
1618*5c23704eSSong Gao        {
1619*5c23704eSSong Gao            .fniv = gen_vabsd_u,
1620*5c23704eSSong Gao            .fno = gen_helper_vabsd_hu,
1621*5c23704eSSong Gao            .opt_opc = vecop_list,
1622*5c23704eSSong Gao            .vece = MO_16
1623*5c23704eSSong Gao        },
1624*5c23704eSSong Gao        {
1625*5c23704eSSong Gao            .fniv = gen_vabsd_u,
1626*5c23704eSSong Gao            .fno = gen_helper_vabsd_wu,
1627*5c23704eSSong Gao            .opt_opc = vecop_list,
1628*5c23704eSSong Gao            .vece = MO_32
1629*5c23704eSSong Gao        },
1630*5c23704eSSong Gao        {
1631*5c23704eSSong Gao            .fniv = gen_vabsd_u,
1632*5c23704eSSong Gao            .fno = gen_helper_vabsd_du,
1633*5c23704eSSong Gao            .opt_opc = vecop_list,
1634*5c23704eSSong Gao            .vece = MO_64
1635*5c23704eSSong Gao        },
1636*5c23704eSSong Gao    };
1637*5c23704eSSong Gao
1638*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1639*5c23704eSSong Gao}
1640*5c23704eSSong Gao
1641*5c23704eSSong GaoTRANS(vabsd_b, LSX, gvec_vvv, MO_8, do_vabsd_s)
1642*5c23704eSSong GaoTRANS(vabsd_h, LSX, gvec_vvv, MO_16, do_vabsd_s)
1643*5c23704eSSong GaoTRANS(vabsd_w, LSX, gvec_vvv, MO_32, do_vabsd_s)
1644*5c23704eSSong GaoTRANS(vabsd_d, LSX, gvec_vvv, MO_64, do_vabsd_s)
1645*5c23704eSSong GaoTRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u)
1646*5c23704eSSong GaoTRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u)
1647*5c23704eSSong GaoTRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u)
1648*5c23704eSSong GaoTRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u)
1649*5c23704eSSong GaoTRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s)
1650*5c23704eSSong GaoTRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s)
1651*5c23704eSSong GaoTRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s)
1652*5c23704eSSong GaoTRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s)
1653*5c23704eSSong GaoTRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u)
1654*5c23704eSSong GaoTRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u)
1655*5c23704eSSong GaoTRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u)
1656*5c23704eSSong GaoTRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u)
1657*5c23704eSSong Gao
1658*5c23704eSSong Gaostatic void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1659*5c23704eSSong Gao{
1660*5c23704eSSong Gao    TCGv_vec t1, t2;
1661*5c23704eSSong Gao
1662*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
1663*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
1664*5c23704eSSong Gao
1665*5c23704eSSong Gao    tcg_gen_abs_vec(vece, t1, a);
1666*5c23704eSSong Gao    tcg_gen_abs_vec(vece, t2, b);
1667*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t1, t2);
1668*5c23704eSSong Gao}
1669*5c23704eSSong Gao
1670*5c23704eSSong Gaostatic void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1671*5c23704eSSong Gao                     uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1672*5c23704eSSong Gao{
1673*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1674*5c23704eSSong Gao        INDEX_op_abs_vec, INDEX_op_add_vec, 0
1675*5c23704eSSong Gao        };
1676*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1677*5c23704eSSong Gao        {
1678*5c23704eSSong Gao            .fniv = gen_vadda,
1679*5c23704eSSong Gao            .fno = gen_helper_vadda_b,
1680*5c23704eSSong Gao            .opt_opc = vecop_list,
1681*5c23704eSSong Gao            .vece = MO_8
1682*5c23704eSSong Gao        },
1683*5c23704eSSong Gao        {
1684*5c23704eSSong Gao            .fniv = gen_vadda,
1685*5c23704eSSong Gao            .fno = gen_helper_vadda_h,
1686*5c23704eSSong Gao            .opt_opc = vecop_list,
1687*5c23704eSSong Gao            .vece = MO_16
1688*5c23704eSSong Gao        },
1689*5c23704eSSong Gao        {
1690*5c23704eSSong Gao            .fniv = gen_vadda,
1691*5c23704eSSong Gao            .fno = gen_helper_vadda_w,
1692*5c23704eSSong Gao            .opt_opc = vecop_list,
1693*5c23704eSSong Gao            .vece = MO_32
1694*5c23704eSSong Gao        },
1695*5c23704eSSong Gao        {
1696*5c23704eSSong Gao            .fniv = gen_vadda,
1697*5c23704eSSong Gao            .fno = gen_helper_vadda_d,
1698*5c23704eSSong Gao            .opt_opc = vecop_list,
1699*5c23704eSSong Gao            .vece = MO_64
1700*5c23704eSSong Gao        },
1701*5c23704eSSong Gao    };
1702*5c23704eSSong Gao
1703*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1704*5c23704eSSong Gao}
1705*5c23704eSSong Gao
1706*5c23704eSSong GaoTRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda)
1707*5c23704eSSong GaoTRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda)
1708*5c23704eSSong GaoTRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda)
1709*5c23704eSSong GaoTRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda)
1710*5c23704eSSong GaoTRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda)
1711*5c23704eSSong GaoTRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda)
1712*5c23704eSSong GaoTRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda)
1713*5c23704eSSong GaoTRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda)
1714*5c23704eSSong Gao
1715*5c23704eSSong GaoTRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax)
1716*5c23704eSSong GaoTRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax)
1717*5c23704eSSong GaoTRANS(vmax_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smax)
1718*5c23704eSSong GaoTRANS(vmax_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smax)
1719*5c23704eSSong GaoTRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax)
1720*5c23704eSSong GaoTRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax)
1721*5c23704eSSong GaoTRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax)
1722*5c23704eSSong GaoTRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax)
1723*5c23704eSSong GaoTRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax)
1724*5c23704eSSong GaoTRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax)
1725*5c23704eSSong GaoTRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax)
1726*5c23704eSSong GaoTRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax)
1727*5c23704eSSong GaoTRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax)
1728*5c23704eSSong GaoTRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax)
1729*5c23704eSSong GaoTRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax)
1730*5c23704eSSong GaoTRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax)
1731*5c23704eSSong Gao
1732*5c23704eSSong GaoTRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin)
1733*5c23704eSSong GaoTRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin)
1734*5c23704eSSong GaoTRANS(vmin_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smin)
1735*5c23704eSSong GaoTRANS(vmin_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smin)
1736*5c23704eSSong GaoTRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin)
1737*5c23704eSSong GaoTRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin)
1738*5c23704eSSong GaoTRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin)
1739*5c23704eSSong GaoTRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin)
1740*5c23704eSSong GaoTRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin)
1741*5c23704eSSong GaoTRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin)
1742*5c23704eSSong GaoTRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin)
1743*5c23704eSSong GaoTRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin)
1744*5c23704eSSong GaoTRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin)
1745*5c23704eSSong GaoTRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin)
1746*5c23704eSSong GaoTRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin)
1747*5c23704eSSong GaoTRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin)
1748*5c23704eSSong Gao
1749*5c23704eSSong Gaostatic void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1750*5c23704eSSong Gao{
1751*5c23704eSSong Gao    tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1752*5c23704eSSong Gao}
1753*5c23704eSSong Gao
1754*5c23704eSSong Gaostatic void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1755*5c23704eSSong Gao{
1756*5c23704eSSong Gao    tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1757*5c23704eSSong Gao}
1758*5c23704eSSong Gao
1759*5c23704eSSong Gaostatic void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1760*5c23704eSSong Gao{
1761*5c23704eSSong Gao    tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1762*5c23704eSSong Gao}
1763*5c23704eSSong Gao
1764*5c23704eSSong Gaostatic void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1765*5c23704eSSong Gao{
1766*5c23704eSSong Gao    tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1767*5c23704eSSong Gao}
1768*5c23704eSSong Gao
1769*5c23704eSSong Gaostatic void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1770*5c23704eSSong Gao                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1771*5c23704eSSong Gao{
1772*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1773*5c23704eSSong Gao        INDEX_op_smin_vec, 0
1774*5c23704eSSong Gao        };
1775*5c23704eSSong Gao    static const GVecGen2i op[4] = {
1776*5c23704eSSong Gao        {
1777*5c23704eSSong Gao            .fniv = gen_vmini_s,
1778*5c23704eSSong Gao            .fnoi = gen_helper_vmini_b,
1779*5c23704eSSong Gao            .opt_opc = vecop_list,
1780*5c23704eSSong Gao            .vece = MO_8
1781*5c23704eSSong Gao        },
1782*5c23704eSSong Gao        {
1783*5c23704eSSong Gao            .fniv = gen_vmini_s,
1784*5c23704eSSong Gao            .fnoi = gen_helper_vmini_h,
1785*5c23704eSSong Gao            .opt_opc = vecop_list,
1786*5c23704eSSong Gao            .vece = MO_16
1787*5c23704eSSong Gao        },
1788*5c23704eSSong Gao        {
1789*5c23704eSSong Gao            .fniv = gen_vmini_s,
1790*5c23704eSSong Gao            .fnoi = gen_helper_vmini_w,
1791*5c23704eSSong Gao            .opt_opc = vecop_list,
1792*5c23704eSSong Gao            .vece = MO_32
1793*5c23704eSSong Gao        },
1794*5c23704eSSong Gao        {
1795*5c23704eSSong Gao            .fniv = gen_vmini_s,
1796*5c23704eSSong Gao            .fnoi = gen_helper_vmini_d,
1797*5c23704eSSong Gao            .opt_opc = vecop_list,
1798*5c23704eSSong Gao            .vece = MO_64
1799*5c23704eSSong Gao        },
1800*5c23704eSSong Gao    };
1801*5c23704eSSong Gao
1802*5c23704eSSong Gao    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1803*5c23704eSSong Gao}
1804*5c23704eSSong Gao
1805*5c23704eSSong Gaostatic void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1806*5c23704eSSong Gao                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1807*5c23704eSSong Gao{
1808*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1809*5c23704eSSong Gao        INDEX_op_umin_vec, 0
1810*5c23704eSSong Gao        };
1811*5c23704eSSong Gao    static const GVecGen2i op[4] = {
1812*5c23704eSSong Gao        {
1813*5c23704eSSong Gao            .fniv = gen_vmini_u,
1814*5c23704eSSong Gao            .fnoi = gen_helper_vmini_bu,
1815*5c23704eSSong Gao            .opt_opc = vecop_list,
1816*5c23704eSSong Gao            .vece = MO_8
1817*5c23704eSSong Gao        },
1818*5c23704eSSong Gao        {
1819*5c23704eSSong Gao            .fniv = gen_vmini_u,
1820*5c23704eSSong Gao            .fnoi = gen_helper_vmini_hu,
1821*5c23704eSSong Gao            .opt_opc = vecop_list,
1822*5c23704eSSong Gao            .vece = MO_16
1823*5c23704eSSong Gao        },
1824*5c23704eSSong Gao        {
1825*5c23704eSSong Gao            .fniv = gen_vmini_u,
1826*5c23704eSSong Gao            .fnoi = gen_helper_vmini_wu,
1827*5c23704eSSong Gao            .opt_opc = vecop_list,
1828*5c23704eSSong Gao            .vece = MO_32
1829*5c23704eSSong Gao        },
1830*5c23704eSSong Gao        {
1831*5c23704eSSong Gao            .fniv = gen_vmini_u,
1832*5c23704eSSong Gao            .fnoi = gen_helper_vmini_du,
1833*5c23704eSSong Gao            .opt_opc = vecop_list,
1834*5c23704eSSong Gao            .vece = MO_64
1835*5c23704eSSong Gao        },
1836*5c23704eSSong Gao    };
1837*5c23704eSSong Gao
1838*5c23704eSSong Gao    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1839*5c23704eSSong Gao}
1840*5c23704eSSong Gao
1841*5c23704eSSong GaoTRANS(vmini_b, LSX, gvec_vv_i, MO_8, do_vmini_s)
1842*5c23704eSSong GaoTRANS(vmini_h, LSX, gvec_vv_i, MO_16, do_vmini_s)
1843*5c23704eSSong GaoTRANS(vmini_w, LSX, gvec_vv_i, MO_32, do_vmini_s)
1844*5c23704eSSong GaoTRANS(vmini_d, LSX, gvec_vv_i, MO_64, do_vmini_s)
1845*5c23704eSSong GaoTRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u)
1846*5c23704eSSong GaoTRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u)
1847*5c23704eSSong GaoTRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u)
1848*5c23704eSSong GaoTRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u)
1849*5c23704eSSong GaoTRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s)
1850*5c23704eSSong GaoTRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s)
1851*5c23704eSSong GaoTRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s)
1852*5c23704eSSong GaoTRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s)
1853*5c23704eSSong GaoTRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u)
1854*5c23704eSSong GaoTRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u)
1855*5c23704eSSong GaoTRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u)
1856*5c23704eSSong GaoTRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u)
1857*5c23704eSSong Gao
1858*5c23704eSSong Gaostatic void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1859*5c23704eSSong Gao                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1860*5c23704eSSong Gao{
1861*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1862*5c23704eSSong Gao        INDEX_op_smax_vec, 0
1863*5c23704eSSong Gao        };
1864*5c23704eSSong Gao    static const GVecGen2i op[4] = {
1865*5c23704eSSong Gao        {
1866*5c23704eSSong Gao            .fniv = gen_vmaxi_s,
1867*5c23704eSSong Gao            .fnoi = gen_helper_vmaxi_b,
1868*5c23704eSSong Gao            .opt_opc = vecop_list,
1869*5c23704eSSong Gao            .vece = MO_8
1870*5c23704eSSong Gao        },
1871*5c23704eSSong Gao        {
1872*5c23704eSSong Gao            .fniv = gen_vmaxi_s,
1873*5c23704eSSong Gao            .fnoi = gen_helper_vmaxi_h,
1874*5c23704eSSong Gao            .opt_opc = vecop_list,
1875*5c23704eSSong Gao            .vece = MO_16
1876*5c23704eSSong Gao        },
1877*5c23704eSSong Gao        {
1878*5c23704eSSong Gao            .fniv = gen_vmaxi_s,
1879*5c23704eSSong Gao            .fnoi = gen_helper_vmaxi_w,
1880*5c23704eSSong Gao            .opt_opc = vecop_list,
1881*5c23704eSSong Gao            .vece = MO_32
1882*5c23704eSSong Gao        },
1883*5c23704eSSong Gao        {
1884*5c23704eSSong Gao            .fniv = gen_vmaxi_s,
1885*5c23704eSSong Gao            .fnoi = gen_helper_vmaxi_d,
1886*5c23704eSSong Gao            .opt_opc = vecop_list,
1887*5c23704eSSong Gao            .vece = MO_64
1888*5c23704eSSong Gao        },
1889*5c23704eSSong Gao    };
1890*5c23704eSSong Gao
1891*5c23704eSSong Gao    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1892*5c23704eSSong Gao}
1893*5c23704eSSong Gao
1894*5c23704eSSong Gaostatic void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1895*5c23704eSSong Gao                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1896*5c23704eSSong Gao{
1897*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
1898*5c23704eSSong Gao        INDEX_op_umax_vec, 0
1899*5c23704eSSong Gao        };
1900*5c23704eSSong Gao    static const GVecGen2i op[4] = {
1901*5c23704eSSong Gao        {
1902*5c23704eSSong Gao            .fniv = gen_vmaxi_u,
1903*5c23704eSSong Gao            .fnoi = gen_helper_vmaxi_bu,
1904*5c23704eSSong Gao            .opt_opc = vecop_list,
1905*5c23704eSSong Gao            .vece = MO_8
1906*5c23704eSSong Gao        },
1907*5c23704eSSong Gao        {
1908*5c23704eSSong Gao            .fniv = gen_vmaxi_u,
1909*5c23704eSSong Gao            .fnoi = gen_helper_vmaxi_hu,
1910*5c23704eSSong Gao            .opt_opc = vecop_list,
1911*5c23704eSSong Gao            .vece = MO_16
1912*5c23704eSSong Gao        },
1913*5c23704eSSong Gao        {
1914*5c23704eSSong Gao            .fniv = gen_vmaxi_u,
1915*5c23704eSSong Gao            .fnoi = gen_helper_vmaxi_wu,
1916*5c23704eSSong Gao            .opt_opc = vecop_list,
1917*5c23704eSSong Gao            .vece = MO_32
1918*5c23704eSSong Gao        },
1919*5c23704eSSong Gao        {
1920*5c23704eSSong Gao            .fniv = gen_vmaxi_u,
1921*5c23704eSSong Gao            .fnoi = gen_helper_vmaxi_du,
1922*5c23704eSSong Gao            .opt_opc = vecop_list,
1923*5c23704eSSong Gao            .vece = MO_64
1924*5c23704eSSong Gao        },
1925*5c23704eSSong Gao    };
1926*5c23704eSSong Gao
1927*5c23704eSSong Gao    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1928*5c23704eSSong Gao}
1929*5c23704eSSong Gao
1930*5c23704eSSong GaoTRANS(vmaxi_b, LSX, gvec_vv_i, MO_8, do_vmaxi_s)
1931*5c23704eSSong GaoTRANS(vmaxi_h, LSX, gvec_vv_i, MO_16, do_vmaxi_s)
1932*5c23704eSSong GaoTRANS(vmaxi_w, LSX, gvec_vv_i, MO_32, do_vmaxi_s)
1933*5c23704eSSong GaoTRANS(vmaxi_d, LSX, gvec_vv_i, MO_64, do_vmaxi_s)
1934*5c23704eSSong GaoTRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u)
1935*5c23704eSSong GaoTRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u)
1936*5c23704eSSong GaoTRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u)
1937*5c23704eSSong GaoTRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u)
1938*5c23704eSSong GaoTRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s)
1939*5c23704eSSong GaoTRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s)
1940*5c23704eSSong GaoTRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s)
1941*5c23704eSSong GaoTRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s)
1942*5c23704eSSong GaoTRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u)
1943*5c23704eSSong GaoTRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u)
1944*5c23704eSSong GaoTRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u)
1945*5c23704eSSong GaoTRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u)
1946*5c23704eSSong Gao
1947*5c23704eSSong GaoTRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
1948*5c23704eSSong GaoTRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
1949*5c23704eSSong GaoTRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul)
1950*5c23704eSSong GaoTRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul)
1951*5c23704eSSong GaoTRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul)
1952*5c23704eSSong GaoTRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul)
1953*5c23704eSSong GaoTRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul)
1954*5c23704eSSong GaoTRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul)
1955*5c23704eSSong Gao
1956*5c23704eSSong Gaostatic void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1957*5c23704eSSong Gao{
1958*5c23704eSSong Gao    TCGv_i32 discard = tcg_temp_new_i32();
1959*5c23704eSSong Gao    tcg_gen_muls2_i32(discard, t, a, b);
1960*5c23704eSSong Gao}
1961*5c23704eSSong Gao
1962*5c23704eSSong Gaostatic void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1963*5c23704eSSong Gao{
1964*5c23704eSSong Gao    TCGv_i64 discard = tcg_temp_new_i64();
1965*5c23704eSSong Gao    tcg_gen_muls2_i64(discard, t, a, b);
1966*5c23704eSSong Gao}
1967*5c23704eSSong Gao
1968*5c23704eSSong Gaostatic void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1969*5c23704eSSong Gao                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1970*5c23704eSSong Gao{
1971*5c23704eSSong Gao    static const GVecGen3 op[4] = {
1972*5c23704eSSong Gao        {
1973*5c23704eSSong Gao            .fno = gen_helper_vmuh_b,
1974*5c23704eSSong Gao            .vece = MO_8
1975*5c23704eSSong Gao        },
1976*5c23704eSSong Gao        {
1977*5c23704eSSong Gao            .fno = gen_helper_vmuh_h,
1978*5c23704eSSong Gao            .vece = MO_16
1979*5c23704eSSong Gao        },
1980*5c23704eSSong Gao        {
1981*5c23704eSSong Gao            .fni4 = gen_vmuh_w,
1982*5c23704eSSong Gao            .fno = gen_helper_vmuh_w,
1983*5c23704eSSong Gao            .vece = MO_32
1984*5c23704eSSong Gao        },
1985*5c23704eSSong Gao        {
1986*5c23704eSSong Gao            .fni8 = gen_vmuh_d,
1987*5c23704eSSong Gao            .fno = gen_helper_vmuh_d,
1988*5c23704eSSong Gao            .vece = MO_64
1989*5c23704eSSong Gao        },
1990*5c23704eSSong Gao    };
1991*5c23704eSSong Gao
1992*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1993*5c23704eSSong Gao}
1994*5c23704eSSong Gao
1995*5c23704eSSong GaoTRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s)
1996*5c23704eSSong GaoTRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s)
1997*5c23704eSSong GaoTRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s)
1998*5c23704eSSong GaoTRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s)
1999*5c23704eSSong GaoTRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s)
2000*5c23704eSSong GaoTRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s)
2001*5c23704eSSong GaoTRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s)
2002*5c23704eSSong GaoTRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s)
2003*5c23704eSSong Gao
2004*5c23704eSSong Gaostatic void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2005*5c23704eSSong Gao{
2006*5c23704eSSong Gao    TCGv_i32 discard = tcg_temp_new_i32();
2007*5c23704eSSong Gao    tcg_gen_mulu2_i32(discard, t, a, b);
2008*5c23704eSSong Gao}
2009*5c23704eSSong Gao
2010*5c23704eSSong Gaostatic void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2011*5c23704eSSong Gao{
2012*5c23704eSSong Gao    TCGv_i64 discard = tcg_temp_new_i64();
2013*5c23704eSSong Gao    tcg_gen_mulu2_i64(discard, t, a, b);
2014*5c23704eSSong Gao}
2015*5c23704eSSong Gao
2016*5c23704eSSong Gaostatic void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2017*5c23704eSSong Gao                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2018*5c23704eSSong Gao{
2019*5c23704eSSong Gao    static const GVecGen3 op[4] = {
2020*5c23704eSSong Gao        {
2021*5c23704eSSong Gao            .fno = gen_helper_vmuh_bu,
2022*5c23704eSSong Gao            .vece = MO_8
2023*5c23704eSSong Gao        },
2024*5c23704eSSong Gao        {
2025*5c23704eSSong Gao            .fno = gen_helper_vmuh_hu,
2026*5c23704eSSong Gao            .vece = MO_16
2027*5c23704eSSong Gao        },
2028*5c23704eSSong Gao        {
2029*5c23704eSSong Gao            .fni4 = gen_vmuh_wu,
2030*5c23704eSSong Gao            .fno = gen_helper_vmuh_wu,
2031*5c23704eSSong Gao            .vece = MO_32
2032*5c23704eSSong Gao        },
2033*5c23704eSSong Gao        {
2034*5c23704eSSong Gao            .fni8 = gen_vmuh_du,
2035*5c23704eSSong Gao            .fno = gen_helper_vmuh_du,
2036*5c23704eSSong Gao            .vece = MO_64
2037*5c23704eSSong Gao        },
2038*5c23704eSSong Gao    };
2039*5c23704eSSong Gao
2040*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2041*5c23704eSSong Gao}
2042*5c23704eSSong Gao
2043*5c23704eSSong GaoTRANS(vmuh_bu, LSX, gvec_vvv, MO_8,  do_vmuh_u)
2044*5c23704eSSong GaoTRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u)
2045*5c23704eSSong GaoTRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u)
2046*5c23704eSSong GaoTRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u)
2047*5c23704eSSong GaoTRANS(xvmuh_bu, LASX, gvec_xxx, MO_8,  do_vmuh_u)
2048*5c23704eSSong GaoTRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u)
2049*5c23704eSSong GaoTRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u)
2050*5c23704eSSong GaoTRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u)
2051*5c23704eSSong Gao
2052*5c23704eSSong Gaostatic void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2053*5c23704eSSong Gao{
2054*5c23704eSSong Gao    TCGv_vec t1, t2;
2055*5c23704eSSong Gao    int halfbits = 4 << vece;
2056*5c23704eSSong Gao
2057*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
2058*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
2059*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t1, a, halfbits);
2060*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t1, t1, halfbits);
2061*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t2, b, halfbits);
2062*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, t2, halfbits);
2063*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t, t1, t2);
2064*5c23704eSSong Gao}
2065*5c23704eSSong Gao
2066*5c23704eSSong Gaostatic void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2067*5c23704eSSong Gao{
2068*5c23704eSSong Gao    TCGv_i32 t1, t2;
2069*5c23704eSSong Gao
2070*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2071*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
2072*5c23704eSSong Gao    tcg_gen_ext16s_i32(t1, a);
2073*5c23704eSSong Gao    tcg_gen_ext16s_i32(t2, b);
2074*5c23704eSSong Gao    tcg_gen_mul_i32(t, t1, t2);
2075*5c23704eSSong Gao}
2076*5c23704eSSong Gao
2077*5c23704eSSong Gaostatic void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2078*5c23704eSSong Gao{
2079*5c23704eSSong Gao    TCGv_i64 t1, t2;
2080*5c23704eSSong Gao
2081*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2082*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
2083*5c23704eSSong Gao    tcg_gen_ext32s_i64(t1, a);
2084*5c23704eSSong Gao    tcg_gen_ext32s_i64(t2, b);
2085*5c23704eSSong Gao    tcg_gen_mul_i64(t, t1, t2);
2086*5c23704eSSong Gao}
2087*5c23704eSSong Gao
2088*5c23704eSSong Gaostatic void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2089*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2090*5c23704eSSong Gao{
2091*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2092*5c23704eSSong Gao        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2093*5c23704eSSong Gao        };
2094*5c23704eSSong Gao    static const GVecGen3 op[3] = {
2095*5c23704eSSong Gao        {
2096*5c23704eSSong Gao            .fniv = gen_vmulwev_s,
2097*5c23704eSSong Gao            .fno = gen_helper_vmulwev_h_b,
2098*5c23704eSSong Gao            .opt_opc = vecop_list,
2099*5c23704eSSong Gao            .vece = MO_16
2100*5c23704eSSong Gao        },
2101*5c23704eSSong Gao        {
2102*5c23704eSSong Gao            .fni4 = gen_vmulwev_w_h,
2103*5c23704eSSong Gao            .fniv = gen_vmulwev_s,
2104*5c23704eSSong Gao            .fno = gen_helper_vmulwev_w_h,
2105*5c23704eSSong Gao            .opt_opc = vecop_list,
2106*5c23704eSSong Gao            .vece = MO_32
2107*5c23704eSSong Gao        },
2108*5c23704eSSong Gao        {
2109*5c23704eSSong Gao            .fni8 = gen_vmulwev_d_w,
2110*5c23704eSSong Gao            .fniv = gen_vmulwev_s,
2111*5c23704eSSong Gao            .fno = gen_helper_vmulwev_d_w,
2112*5c23704eSSong Gao            .opt_opc = vecop_list,
2113*5c23704eSSong Gao            .vece = MO_64
2114*5c23704eSSong Gao        },
2115*5c23704eSSong Gao    };
2116*5c23704eSSong Gao
2117*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2118*5c23704eSSong Gao}
2119*5c23704eSSong Gao
2120*5c23704eSSong GaoTRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s)
2121*5c23704eSSong GaoTRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s)
2122*5c23704eSSong GaoTRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s)
2123*5c23704eSSong GaoTRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s)
2124*5c23704eSSong GaoTRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s)
2125*5c23704eSSong GaoTRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s)
2126*5c23704eSSong Gao
2127*5c23704eSSong Gaostatic void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
2128*5c23704eSSong Gao                               TCGv_i64 arg1, TCGv_i64 arg2)
2129*5c23704eSSong Gao{
2130*5c23704eSSong Gao    tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
2131*5c23704eSSong Gao}
2132*5c23704eSSong Gao
2133*5c23704eSSong Gaostatic bool gen_vmul_q_vl(DisasContext *ctx,
2134*5c23704eSSong Gao                          arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
2135*5c23704eSSong Gao                          void (*func)(TCGv_i64, TCGv_i64,
2136*5c23704eSSong Gao                                       TCGv_i64, TCGv_i64))
2137*5c23704eSSong Gao{
2138*5c23704eSSong Gao    TCGv_i64 rh, rl, arg1, arg2;
2139*5c23704eSSong Gao    int i;
2140*5c23704eSSong Gao
2141*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
2142*5c23704eSSong Gao        return true;
2143*5c23704eSSong Gao    }
2144*5c23704eSSong Gao
2145*5c23704eSSong Gao    rh = tcg_temp_new_i64();
2146*5c23704eSSong Gao    rl = tcg_temp_new_i64();
2147*5c23704eSSong Gao    arg1 = tcg_temp_new_i64();
2148*5c23704eSSong Gao    arg2 = tcg_temp_new_i64();
2149*5c23704eSSong Gao
2150*5c23704eSSong Gao    for (i = 0; i < oprsz / 16; i++) {
2151*5c23704eSSong Gao        get_vreg64(arg1, a->vj, 2 * i + idx1);
2152*5c23704eSSong Gao        get_vreg64(arg2, a->vk, 2 * i + idx2);
2153*5c23704eSSong Gao
2154*5c23704eSSong Gao        func(rl, rh, arg1, arg2);
2155*5c23704eSSong Gao
2156*5c23704eSSong Gao        set_vreg64(rh, a->vd, 2 * i + 1);
2157*5c23704eSSong Gao        set_vreg64(rl, a->vd, 2 * i);
2158*5c23704eSSong Gao    }
2159*5c23704eSSong Gao
2160*5c23704eSSong Gao    return true;
2161*5c23704eSSong Gao}
2162*5c23704eSSong Gao
2163*5c23704eSSong Gaostatic bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2164*5c23704eSSong Gao                       void (*func)(TCGv_i64, TCGv_i64,
2165*5c23704eSSong Gao                                    TCGv_i64, TCGv_i64))
2166*5c23704eSSong Gao{
2167*5c23704eSSong Gao    return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func);
2168*5c23704eSSong Gao}
2169*5c23704eSSong Gao
2170*5c23704eSSong Gaostatic bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2171*5c23704eSSong Gao                        void (*func)(TCGv_i64, TCGv_i64,
2172*5c23704eSSong Gao                                     TCGv_i64, TCGv_i64))
2173*5c23704eSSong Gao{
2174*5c23704eSSong Gao    return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func);
2175*5c23704eSSong Gao}
2176*5c23704eSSong Gao
2177*5c23704eSSong GaoTRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64)
2178*5c23704eSSong GaoTRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64)
2179*5c23704eSSong GaoTRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64)
2180*5c23704eSSong GaoTRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64)
2181*5c23704eSSong GaoTRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64)
2182*5c23704eSSong GaoTRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64)
2183*5c23704eSSong GaoTRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64)
2184*5c23704eSSong GaoTRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64)
2185*5c23704eSSong GaoTRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64)
2186*5c23704eSSong GaoTRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64)
2187*5c23704eSSong GaoTRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64)
2188*5c23704eSSong GaoTRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64)
2189*5c23704eSSong Gao
2190*5c23704eSSong Gaostatic void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2191*5c23704eSSong Gao{
2192*5c23704eSSong Gao    TCGv_vec t1, t2;
2193*5c23704eSSong Gao    int halfbits = 4 << vece;
2194*5c23704eSSong Gao
2195*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
2196*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
2197*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t1, a, halfbits);
2198*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, b, halfbits);
2199*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t, t1, t2);
2200*5c23704eSSong Gao}
2201*5c23704eSSong Gao
2202*5c23704eSSong Gaostatic void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2203*5c23704eSSong Gao{
2204*5c23704eSSong Gao    TCGv_i32 t1, t2;
2205*5c23704eSSong Gao
2206*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2207*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
2208*5c23704eSSong Gao    tcg_gen_sari_i32(t1, a, 16);
2209*5c23704eSSong Gao    tcg_gen_sari_i32(t2, b, 16);
2210*5c23704eSSong Gao    tcg_gen_mul_i32(t, t1, t2);
2211*5c23704eSSong Gao}
2212*5c23704eSSong Gao
2213*5c23704eSSong Gaostatic void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2214*5c23704eSSong Gao{
2215*5c23704eSSong Gao    TCGv_i64 t1, t2;
2216*5c23704eSSong Gao
2217*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2218*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
2219*5c23704eSSong Gao    tcg_gen_sari_i64(t1, a, 32);
2220*5c23704eSSong Gao    tcg_gen_sari_i64(t2, b, 32);
2221*5c23704eSSong Gao    tcg_gen_mul_i64(t, t1, t2);
2222*5c23704eSSong Gao}
2223*5c23704eSSong Gao
2224*5c23704eSSong Gaostatic void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2225*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2226*5c23704eSSong Gao{
2227*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2228*5c23704eSSong Gao        INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2229*5c23704eSSong Gao        };
2230*5c23704eSSong Gao    static const GVecGen3 op[3] = {
2231*5c23704eSSong Gao        {
2232*5c23704eSSong Gao            .fniv = gen_vmulwod_s,
2233*5c23704eSSong Gao            .fno = gen_helper_vmulwod_h_b,
2234*5c23704eSSong Gao            .opt_opc = vecop_list,
2235*5c23704eSSong Gao            .vece = MO_16
2236*5c23704eSSong Gao        },
2237*5c23704eSSong Gao        {
2238*5c23704eSSong Gao            .fni4 = gen_vmulwod_w_h,
2239*5c23704eSSong Gao            .fniv = gen_vmulwod_s,
2240*5c23704eSSong Gao            .fno = gen_helper_vmulwod_w_h,
2241*5c23704eSSong Gao            .opt_opc = vecop_list,
2242*5c23704eSSong Gao            .vece = MO_32
2243*5c23704eSSong Gao        },
2244*5c23704eSSong Gao        {
2245*5c23704eSSong Gao            .fni8 = gen_vmulwod_d_w,
2246*5c23704eSSong Gao            .fniv = gen_vmulwod_s,
2247*5c23704eSSong Gao            .fno = gen_helper_vmulwod_d_w,
2248*5c23704eSSong Gao            .opt_opc = vecop_list,
2249*5c23704eSSong Gao            .vece = MO_64
2250*5c23704eSSong Gao        },
2251*5c23704eSSong Gao    };
2252*5c23704eSSong Gao
2253*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2254*5c23704eSSong Gao}
2255*5c23704eSSong Gao
2256*5c23704eSSong GaoTRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s)
2257*5c23704eSSong GaoTRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s)
2258*5c23704eSSong GaoTRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s)
2259*5c23704eSSong GaoTRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s)
2260*5c23704eSSong GaoTRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s)
2261*5c23704eSSong GaoTRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s)
2262*5c23704eSSong Gao
2263*5c23704eSSong Gaostatic void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2264*5c23704eSSong Gao{
2265*5c23704eSSong Gao    TCGv_vec t1, t2, mask;
2266*5c23704eSSong Gao
2267*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
2268*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
2269*5c23704eSSong Gao    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2270*5c23704eSSong Gao    tcg_gen_and_vec(vece, t1, a, mask);
2271*5c23704eSSong Gao    tcg_gen_and_vec(vece, t2, b, mask);
2272*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t, t1, t2);
2273*5c23704eSSong Gao}
2274*5c23704eSSong Gao
2275*5c23704eSSong Gaostatic void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2276*5c23704eSSong Gao{
2277*5c23704eSSong Gao    TCGv_i32 t1, t2;
2278*5c23704eSSong Gao
2279*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2280*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
2281*5c23704eSSong Gao    tcg_gen_ext16u_i32(t1, a);
2282*5c23704eSSong Gao    tcg_gen_ext16u_i32(t2, b);
2283*5c23704eSSong Gao    tcg_gen_mul_i32(t, t1, t2);
2284*5c23704eSSong Gao}
2285*5c23704eSSong Gao
2286*5c23704eSSong Gaostatic void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2287*5c23704eSSong Gao{
2288*5c23704eSSong Gao    TCGv_i64 t1, t2;
2289*5c23704eSSong Gao
2290*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2291*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
2292*5c23704eSSong Gao    tcg_gen_ext32u_i64(t1, a);
2293*5c23704eSSong Gao    tcg_gen_ext32u_i64(t2, b);
2294*5c23704eSSong Gao    tcg_gen_mul_i64(t, t1, t2);
2295*5c23704eSSong Gao}
2296*5c23704eSSong Gao
2297*5c23704eSSong Gaostatic void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2298*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2299*5c23704eSSong Gao{
2300*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2301*5c23704eSSong Gao        INDEX_op_mul_vec, 0
2302*5c23704eSSong Gao        };
2303*5c23704eSSong Gao    static const GVecGen3 op[3] = {
2304*5c23704eSSong Gao        {
2305*5c23704eSSong Gao            .fniv = gen_vmulwev_u,
2306*5c23704eSSong Gao            .fno = gen_helper_vmulwev_h_bu,
2307*5c23704eSSong Gao            .opt_opc = vecop_list,
2308*5c23704eSSong Gao            .vece = MO_16
2309*5c23704eSSong Gao        },
2310*5c23704eSSong Gao        {
2311*5c23704eSSong Gao            .fni4 = gen_vmulwev_w_hu,
2312*5c23704eSSong Gao            .fniv = gen_vmulwev_u,
2313*5c23704eSSong Gao            .fno = gen_helper_vmulwev_w_hu,
2314*5c23704eSSong Gao            .opt_opc = vecop_list,
2315*5c23704eSSong Gao            .vece = MO_32
2316*5c23704eSSong Gao        },
2317*5c23704eSSong Gao        {
2318*5c23704eSSong Gao            .fni8 = gen_vmulwev_d_wu,
2319*5c23704eSSong Gao            .fniv = gen_vmulwev_u,
2320*5c23704eSSong Gao            .fno = gen_helper_vmulwev_d_wu,
2321*5c23704eSSong Gao            .opt_opc = vecop_list,
2322*5c23704eSSong Gao            .vece = MO_64
2323*5c23704eSSong Gao        },
2324*5c23704eSSong Gao    };
2325*5c23704eSSong Gao
2326*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2327*5c23704eSSong Gao}
2328*5c23704eSSong Gao
2329*5c23704eSSong GaoTRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u)
2330*5c23704eSSong GaoTRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u)
2331*5c23704eSSong GaoTRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u)
2332*5c23704eSSong GaoTRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u)
2333*5c23704eSSong GaoTRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u)
2334*5c23704eSSong GaoTRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u)
2335*5c23704eSSong Gao
2336*5c23704eSSong Gaostatic void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2337*5c23704eSSong Gao{
2338*5c23704eSSong Gao    TCGv_vec t1, t2;
2339*5c23704eSSong Gao    int halfbits = 4 << vece;
2340*5c23704eSSong Gao
2341*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
2342*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
2343*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t1, a, halfbits);
2344*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t2, b, halfbits);
2345*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t, t1, t2);
2346*5c23704eSSong Gao}
2347*5c23704eSSong Gao
2348*5c23704eSSong Gaostatic void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2349*5c23704eSSong Gao{
2350*5c23704eSSong Gao    TCGv_i32 t1, t2;
2351*5c23704eSSong Gao
2352*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2353*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
2354*5c23704eSSong Gao    tcg_gen_shri_i32(t1, a, 16);
2355*5c23704eSSong Gao    tcg_gen_shri_i32(t2, b, 16);
2356*5c23704eSSong Gao    tcg_gen_mul_i32(t, t1, t2);
2357*5c23704eSSong Gao}
2358*5c23704eSSong Gao
2359*5c23704eSSong Gaostatic void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2360*5c23704eSSong Gao{
2361*5c23704eSSong Gao    TCGv_i64 t1, t2;
2362*5c23704eSSong Gao
2363*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2364*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
2365*5c23704eSSong Gao    tcg_gen_shri_i64(t1, a, 32);
2366*5c23704eSSong Gao    tcg_gen_shri_i64(t2, b, 32);
2367*5c23704eSSong Gao    tcg_gen_mul_i64(t, t1, t2);
2368*5c23704eSSong Gao}
2369*5c23704eSSong Gao
2370*5c23704eSSong Gaostatic void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2371*5c23704eSSong Gao                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2372*5c23704eSSong Gao{
2373*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2374*5c23704eSSong Gao        INDEX_op_shri_vec, INDEX_op_mul_vec, 0
2375*5c23704eSSong Gao        };
2376*5c23704eSSong Gao    static const GVecGen3 op[3] = {
2377*5c23704eSSong Gao        {
2378*5c23704eSSong Gao            .fniv = gen_vmulwod_u,
2379*5c23704eSSong Gao            .fno = gen_helper_vmulwod_h_bu,
2380*5c23704eSSong Gao            .opt_opc = vecop_list,
2381*5c23704eSSong Gao            .vece = MO_16
2382*5c23704eSSong Gao        },
2383*5c23704eSSong Gao        {
2384*5c23704eSSong Gao            .fni4 = gen_vmulwod_w_hu,
2385*5c23704eSSong Gao            .fniv = gen_vmulwod_u,
2386*5c23704eSSong Gao            .fno = gen_helper_vmulwod_w_hu,
2387*5c23704eSSong Gao            .opt_opc = vecop_list,
2388*5c23704eSSong Gao            .vece = MO_32
2389*5c23704eSSong Gao        },
2390*5c23704eSSong Gao        {
2391*5c23704eSSong Gao            .fni8 = gen_vmulwod_d_wu,
2392*5c23704eSSong Gao            .fniv = gen_vmulwod_u,
2393*5c23704eSSong Gao            .fno = gen_helper_vmulwod_d_wu,
2394*5c23704eSSong Gao            .opt_opc = vecop_list,
2395*5c23704eSSong Gao            .vece = MO_64
2396*5c23704eSSong Gao        },
2397*5c23704eSSong Gao    };
2398*5c23704eSSong Gao
2399*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2400*5c23704eSSong Gao}
2401*5c23704eSSong Gao
2402*5c23704eSSong GaoTRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u)
2403*5c23704eSSong GaoTRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u)
2404*5c23704eSSong GaoTRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u)
2405*5c23704eSSong GaoTRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u)
2406*5c23704eSSong GaoTRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u)
2407*5c23704eSSong GaoTRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u)
2408*5c23704eSSong Gao
2409*5c23704eSSong Gaostatic void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2410*5c23704eSSong Gao{
2411*5c23704eSSong Gao    TCGv_vec t1, t2, mask;
2412*5c23704eSSong Gao    int halfbits = 4 << vece;
2413*5c23704eSSong Gao
2414*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
2415*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
2416*5c23704eSSong Gao    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2417*5c23704eSSong Gao    tcg_gen_and_vec(vece, t1, a, mask);
2418*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t2, b, halfbits);
2419*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, t2, halfbits);
2420*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t, t1, t2);
2421*5c23704eSSong Gao}
2422*5c23704eSSong Gao
2423*5c23704eSSong Gaostatic void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2424*5c23704eSSong Gao{
2425*5c23704eSSong Gao    TCGv_i32 t1, t2;
2426*5c23704eSSong Gao
2427*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2428*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
2429*5c23704eSSong Gao    tcg_gen_ext16u_i32(t1, a);
2430*5c23704eSSong Gao    tcg_gen_ext16s_i32(t2, b);
2431*5c23704eSSong Gao    tcg_gen_mul_i32(t, t1, t2);
2432*5c23704eSSong Gao}
2433*5c23704eSSong Gao
2434*5c23704eSSong Gaostatic void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2435*5c23704eSSong Gao{
2436*5c23704eSSong Gao    TCGv_i64 t1, t2;
2437*5c23704eSSong Gao
2438*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2439*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
2440*5c23704eSSong Gao    tcg_gen_ext32u_i64(t1, a);
2441*5c23704eSSong Gao    tcg_gen_ext32s_i64(t2, b);
2442*5c23704eSSong Gao    tcg_gen_mul_i64(t, t1, t2);
2443*5c23704eSSong Gao}
2444*5c23704eSSong Gao
2445*5c23704eSSong Gaostatic void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2446*5c23704eSSong Gao                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2447*5c23704eSSong Gao{
2448*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2449*5c23704eSSong Gao        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2450*5c23704eSSong Gao        };
2451*5c23704eSSong Gao    static const GVecGen3 op[3] = {
2452*5c23704eSSong Gao        {
2453*5c23704eSSong Gao            .fniv = gen_vmulwev_u_s,
2454*5c23704eSSong Gao            .fno = gen_helper_vmulwev_h_bu_b,
2455*5c23704eSSong Gao            .opt_opc = vecop_list,
2456*5c23704eSSong Gao            .vece = MO_16
2457*5c23704eSSong Gao        },
2458*5c23704eSSong Gao        {
2459*5c23704eSSong Gao            .fni4 = gen_vmulwev_w_hu_h,
2460*5c23704eSSong Gao            .fniv = gen_vmulwev_u_s,
2461*5c23704eSSong Gao            .fno = gen_helper_vmulwev_w_hu_h,
2462*5c23704eSSong Gao            .opt_opc = vecop_list,
2463*5c23704eSSong Gao            .vece = MO_32
2464*5c23704eSSong Gao        },
2465*5c23704eSSong Gao        {
2466*5c23704eSSong Gao            .fni8 = gen_vmulwev_d_wu_w,
2467*5c23704eSSong Gao            .fniv = gen_vmulwev_u_s,
2468*5c23704eSSong Gao            .fno = gen_helper_vmulwev_d_wu_w,
2469*5c23704eSSong Gao            .opt_opc = vecop_list,
2470*5c23704eSSong Gao            .vece = MO_64
2471*5c23704eSSong Gao        },
2472*5c23704eSSong Gao    };
2473*5c23704eSSong Gao
2474*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2475*5c23704eSSong Gao}
2476*5c23704eSSong Gao
2477*5c23704eSSong GaoTRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s)
2478*5c23704eSSong GaoTRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s)
2479*5c23704eSSong GaoTRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s)
2480*5c23704eSSong GaoTRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s)
2481*5c23704eSSong GaoTRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s)
2482*5c23704eSSong GaoTRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s)
2483*5c23704eSSong Gao
2484*5c23704eSSong Gaostatic void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2485*5c23704eSSong Gao{
2486*5c23704eSSong Gao    TCGv_vec t1, t2;
2487*5c23704eSSong Gao    int halfbits = 4 << vece;
2488*5c23704eSSong Gao
2489*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
2490*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
2491*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t1, a, halfbits);
2492*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, b, halfbits);
2493*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t, t1, t2);
2494*5c23704eSSong Gao}
2495*5c23704eSSong Gao
2496*5c23704eSSong Gaostatic void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2497*5c23704eSSong Gao{
2498*5c23704eSSong Gao    TCGv_i32 t1, t2;
2499*5c23704eSSong Gao
2500*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2501*5c23704eSSong Gao    t2 = tcg_temp_new_i32();
2502*5c23704eSSong Gao    tcg_gen_shri_i32(t1, a, 16);
2503*5c23704eSSong Gao    tcg_gen_sari_i32(t2, b, 16);
2504*5c23704eSSong Gao    tcg_gen_mul_i32(t, t1, t2);
2505*5c23704eSSong Gao}
2506*5c23704eSSong Gaostatic void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2507*5c23704eSSong Gao{
2508*5c23704eSSong Gao    TCGv_i64 t1, t2;
2509*5c23704eSSong Gao
2510*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2511*5c23704eSSong Gao    t2 = tcg_temp_new_i64();
2512*5c23704eSSong Gao    tcg_gen_shri_i64(t1, a, 32);
2513*5c23704eSSong Gao    tcg_gen_sari_i64(t2, b, 32);
2514*5c23704eSSong Gao    tcg_gen_mul_i64(t, t1, t2);
2515*5c23704eSSong Gao}
2516*5c23704eSSong Gao
2517*5c23704eSSong Gaostatic void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2518*5c23704eSSong Gao                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2519*5c23704eSSong Gao{
2520*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2521*5c23704eSSong Gao        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2522*5c23704eSSong Gao        };
2523*5c23704eSSong Gao    static const GVecGen3 op[3] = {
2524*5c23704eSSong Gao        {
2525*5c23704eSSong Gao            .fniv = gen_vmulwod_u_s,
2526*5c23704eSSong Gao            .fno = gen_helper_vmulwod_h_bu_b,
2527*5c23704eSSong Gao            .opt_opc = vecop_list,
2528*5c23704eSSong Gao            .vece = MO_16
2529*5c23704eSSong Gao        },
2530*5c23704eSSong Gao        {
2531*5c23704eSSong Gao            .fni4 = gen_vmulwod_w_hu_h,
2532*5c23704eSSong Gao            .fniv = gen_vmulwod_u_s,
2533*5c23704eSSong Gao            .fno = gen_helper_vmulwod_w_hu_h,
2534*5c23704eSSong Gao            .opt_opc = vecop_list,
2535*5c23704eSSong Gao            .vece = MO_32
2536*5c23704eSSong Gao        },
2537*5c23704eSSong Gao        {
2538*5c23704eSSong Gao            .fni8 = gen_vmulwod_d_wu_w,
2539*5c23704eSSong Gao            .fniv = gen_vmulwod_u_s,
2540*5c23704eSSong Gao            .fno = gen_helper_vmulwod_d_wu_w,
2541*5c23704eSSong Gao            .opt_opc = vecop_list,
2542*5c23704eSSong Gao            .vece = MO_64
2543*5c23704eSSong Gao        },
2544*5c23704eSSong Gao    };
2545*5c23704eSSong Gao
2546*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2547*5c23704eSSong Gao}
2548*5c23704eSSong Gao
2549*5c23704eSSong GaoTRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s)
2550*5c23704eSSong GaoTRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s)
2551*5c23704eSSong GaoTRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s)
2552*5c23704eSSong GaoTRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s)
2553*5c23704eSSong GaoTRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s)
2554*5c23704eSSong GaoTRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s)
2555*5c23704eSSong Gao
2556*5c23704eSSong Gaostatic void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2557*5c23704eSSong Gao{
2558*5c23704eSSong Gao    TCGv_vec t1;
2559*5c23704eSSong Gao
2560*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(t);
2561*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t1, a, b);
2562*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t, t1);
2563*5c23704eSSong Gao}
2564*5c23704eSSong Gao
2565*5c23704eSSong Gaostatic void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2566*5c23704eSSong Gao{
2567*5c23704eSSong Gao    TCGv_i32 t1;
2568*5c23704eSSong Gao
2569*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2570*5c23704eSSong Gao    tcg_gen_mul_i32(t1, a, b);
2571*5c23704eSSong Gao    tcg_gen_add_i32(t, t, t1);
2572*5c23704eSSong Gao}
2573*5c23704eSSong Gao
2574*5c23704eSSong Gaostatic void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2575*5c23704eSSong Gao{
2576*5c23704eSSong Gao    TCGv_i64 t1;
2577*5c23704eSSong Gao
2578*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2579*5c23704eSSong Gao    tcg_gen_mul_i64(t1, a, b);
2580*5c23704eSSong Gao    tcg_gen_add_i64(t, t, t1);
2581*5c23704eSSong Gao}
2582*5c23704eSSong Gao
2583*5c23704eSSong Gaostatic void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2584*5c23704eSSong Gao                     uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2585*5c23704eSSong Gao{
2586*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2587*5c23704eSSong Gao        INDEX_op_mul_vec, INDEX_op_add_vec, 0
2588*5c23704eSSong Gao        };
2589*5c23704eSSong Gao    static const GVecGen3 op[4] = {
2590*5c23704eSSong Gao        {
2591*5c23704eSSong Gao            .fniv = gen_vmadd,
2592*5c23704eSSong Gao            .fno = gen_helper_vmadd_b,
2593*5c23704eSSong Gao            .load_dest = true,
2594*5c23704eSSong Gao            .opt_opc = vecop_list,
2595*5c23704eSSong Gao            .vece = MO_8
2596*5c23704eSSong Gao        },
2597*5c23704eSSong Gao        {
2598*5c23704eSSong Gao            .fniv = gen_vmadd,
2599*5c23704eSSong Gao            .fno = gen_helper_vmadd_h,
2600*5c23704eSSong Gao            .load_dest = true,
2601*5c23704eSSong Gao            .opt_opc = vecop_list,
2602*5c23704eSSong Gao            .vece = MO_16
2603*5c23704eSSong Gao        },
2604*5c23704eSSong Gao        {
2605*5c23704eSSong Gao            .fni4 = gen_vmadd_w,
2606*5c23704eSSong Gao            .fniv = gen_vmadd,
2607*5c23704eSSong Gao            .fno = gen_helper_vmadd_w,
2608*5c23704eSSong Gao            .load_dest = true,
2609*5c23704eSSong Gao            .opt_opc = vecop_list,
2610*5c23704eSSong Gao            .vece = MO_32
2611*5c23704eSSong Gao        },
2612*5c23704eSSong Gao        {
2613*5c23704eSSong Gao            .fni8 = gen_vmadd_d,
2614*5c23704eSSong Gao            .fniv = gen_vmadd,
2615*5c23704eSSong Gao            .fno = gen_helper_vmadd_d,
2616*5c23704eSSong Gao            .load_dest = true,
2617*5c23704eSSong Gao            .opt_opc = vecop_list,
2618*5c23704eSSong Gao            .vece = MO_64
2619*5c23704eSSong Gao        },
2620*5c23704eSSong Gao    };
2621*5c23704eSSong Gao
2622*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2623*5c23704eSSong Gao}
2624*5c23704eSSong Gao
2625*5c23704eSSong GaoTRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd)
2626*5c23704eSSong GaoTRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd)
2627*5c23704eSSong GaoTRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd)
2628*5c23704eSSong GaoTRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd)
2629*5c23704eSSong GaoTRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd)
2630*5c23704eSSong GaoTRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd)
2631*5c23704eSSong GaoTRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd)
2632*5c23704eSSong GaoTRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd)
2633*5c23704eSSong Gao
2634*5c23704eSSong Gaostatic void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2635*5c23704eSSong Gao{
2636*5c23704eSSong Gao    TCGv_vec t1;
2637*5c23704eSSong Gao
2638*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(t);
2639*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t1, a, b);
2640*5c23704eSSong Gao    tcg_gen_sub_vec(vece, t, t, t1);
2641*5c23704eSSong Gao}
2642*5c23704eSSong Gao
2643*5c23704eSSong Gaostatic void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2644*5c23704eSSong Gao{
2645*5c23704eSSong Gao    TCGv_i32 t1;
2646*5c23704eSSong Gao
2647*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2648*5c23704eSSong Gao    tcg_gen_mul_i32(t1, a, b);
2649*5c23704eSSong Gao    tcg_gen_sub_i32(t, t, t1);
2650*5c23704eSSong Gao}
2651*5c23704eSSong Gao
2652*5c23704eSSong Gaostatic void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2653*5c23704eSSong Gao{
2654*5c23704eSSong Gao    TCGv_i64 t1;
2655*5c23704eSSong Gao
2656*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2657*5c23704eSSong Gao    tcg_gen_mul_i64(t1, a, b);
2658*5c23704eSSong Gao    tcg_gen_sub_i64(t, t, t1);
2659*5c23704eSSong Gao}
2660*5c23704eSSong Gao
2661*5c23704eSSong Gaostatic void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2662*5c23704eSSong Gao                     uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2663*5c23704eSSong Gao{
2664*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2665*5c23704eSSong Gao        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
2666*5c23704eSSong Gao        };
2667*5c23704eSSong Gao    static const GVecGen3 op[4] = {
2668*5c23704eSSong Gao        {
2669*5c23704eSSong Gao            .fniv = gen_vmsub,
2670*5c23704eSSong Gao            .fno = gen_helper_vmsub_b,
2671*5c23704eSSong Gao            .load_dest = true,
2672*5c23704eSSong Gao            .opt_opc = vecop_list,
2673*5c23704eSSong Gao            .vece = MO_8
2674*5c23704eSSong Gao        },
2675*5c23704eSSong Gao        {
2676*5c23704eSSong Gao            .fniv = gen_vmsub,
2677*5c23704eSSong Gao            .fno = gen_helper_vmsub_h,
2678*5c23704eSSong Gao            .load_dest = true,
2679*5c23704eSSong Gao            .opt_opc = vecop_list,
2680*5c23704eSSong Gao            .vece = MO_16
2681*5c23704eSSong Gao        },
2682*5c23704eSSong Gao        {
2683*5c23704eSSong Gao            .fni4 = gen_vmsub_w,
2684*5c23704eSSong Gao            .fniv = gen_vmsub,
2685*5c23704eSSong Gao            .fno = gen_helper_vmsub_w,
2686*5c23704eSSong Gao            .load_dest = true,
2687*5c23704eSSong Gao            .opt_opc = vecop_list,
2688*5c23704eSSong Gao            .vece = MO_32
2689*5c23704eSSong Gao        },
2690*5c23704eSSong Gao        {
2691*5c23704eSSong Gao            .fni8 = gen_vmsub_d,
2692*5c23704eSSong Gao            .fniv = gen_vmsub,
2693*5c23704eSSong Gao            .fno = gen_helper_vmsub_d,
2694*5c23704eSSong Gao            .load_dest = true,
2695*5c23704eSSong Gao            .opt_opc = vecop_list,
2696*5c23704eSSong Gao            .vece = MO_64
2697*5c23704eSSong Gao        },
2698*5c23704eSSong Gao    };
2699*5c23704eSSong Gao
2700*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2701*5c23704eSSong Gao}
2702*5c23704eSSong Gao
2703*5c23704eSSong GaoTRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub)
2704*5c23704eSSong GaoTRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub)
2705*5c23704eSSong GaoTRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub)
2706*5c23704eSSong GaoTRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub)
2707*5c23704eSSong GaoTRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub)
2708*5c23704eSSong GaoTRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub)
2709*5c23704eSSong GaoTRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub)
2710*5c23704eSSong GaoTRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub)
2711*5c23704eSSong Gao
2712*5c23704eSSong Gaostatic void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2713*5c23704eSSong Gao{
2714*5c23704eSSong Gao    TCGv_vec t1, t2, t3;
2715*5c23704eSSong Gao    int halfbits = 4 << vece;
2716*5c23704eSSong Gao
2717*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
2718*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
2719*5c23704eSSong Gao    t3 = tcg_temp_new_vec_matching(t);
2720*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t1, a, halfbits);
2721*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t1, t1, halfbits);
2722*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t2, b, halfbits);
2723*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, t2, halfbits);
2724*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t3, t1, t2);
2725*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t, t3);
2726*5c23704eSSong Gao}
2727*5c23704eSSong Gao
2728*5c23704eSSong Gaostatic void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2729*5c23704eSSong Gao{
2730*5c23704eSSong Gao    TCGv_i32 t1;
2731*5c23704eSSong Gao
2732*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2733*5c23704eSSong Gao    gen_vmulwev_w_h(t1, a, b);
2734*5c23704eSSong Gao    tcg_gen_add_i32(t, t, t1);
2735*5c23704eSSong Gao}
2736*5c23704eSSong Gao
2737*5c23704eSSong Gaostatic void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2738*5c23704eSSong Gao{
2739*5c23704eSSong Gao    TCGv_i64 t1;
2740*5c23704eSSong Gao
2741*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2742*5c23704eSSong Gao    gen_vmulwev_d_w(t1, a, b);
2743*5c23704eSSong Gao    tcg_gen_add_i64(t, t, t1);
2744*5c23704eSSong Gao}
2745*5c23704eSSong Gao
2746*5c23704eSSong Gaostatic void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2747*5c23704eSSong Gao                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2748*5c23704eSSong Gao{
2749*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2750*5c23704eSSong Gao        INDEX_op_shli_vec, INDEX_op_sari_vec,
2751*5c23704eSSong Gao        INDEX_op_mul_vec, INDEX_op_add_vec, 0
2752*5c23704eSSong Gao        };
2753*5c23704eSSong Gao    static const GVecGen3 op[3] = {
2754*5c23704eSSong Gao        {
2755*5c23704eSSong Gao            .fniv = gen_vmaddwev_s,
2756*5c23704eSSong Gao            .fno = gen_helper_vmaddwev_h_b,
2757*5c23704eSSong Gao            .load_dest = true,
2758*5c23704eSSong Gao            .opt_opc = vecop_list,
2759*5c23704eSSong Gao            .vece = MO_16
2760*5c23704eSSong Gao        },
2761*5c23704eSSong Gao        {
2762*5c23704eSSong Gao            .fni4 = gen_vmaddwev_w_h,
2763*5c23704eSSong Gao            .fniv = gen_vmaddwev_s,
2764*5c23704eSSong Gao            .fno = gen_helper_vmaddwev_w_h,
2765*5c23704eSSong Gao            .load_dest = true,
2766*5c23704eSSong Gao            .opt_opc = vecop_list,
2767*5c23704eSSong Gao            .vece = MO_32
2768*5c23704eSSong Gao        },
2769*5c23704eSSong Gao        {
2770*5c23704eSSong Gao            .fni8 = gen_vmaddwev_d_w,
2771*5c23704eSSong Gao            .fniv = gen_vmaddwev_s,
2772*5c23704eSSong Gao            .fno = gen_helper_vmaddwev_d_w,
2773*5c23704eSSong Gao            .load_dest = true,
2774*5c23704eSSong Gao            .opt_opc = vecop_list,
2775*5c23704eSSong Gao            .vece = MO_64
2776*5c23704eSSong Gao        },
2777*5c23704eSSong Gao    };
2778*5c23704eSSong Gao
2779*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2780*5c23704eSSong Gao}
2781*5c23704eSSong Gao
2782*5c23704eSSong GaoTRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s)
2783*5c23704eSSong GaoTRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s)
2784*5c23704eSSong GaoTRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s)
2785*5c23704eSSong GaoTRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s)
2786*5c23704eSSong GaoTRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s)
2787*5c23704eSSong GaoTRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s)
2788*5c23704eSSong Gao
2789*5c23704eSSong Gaostatic bool gen_vmadd_q_vl(DisasContext * ctx,
2790*5c23704eSSong Gao                           arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
2791*5c23704eSSong Gao                           void (*func)(TCGv_i64, TCGv_i64,
2792*5c23704eSSong Gao                                        TCGv_i64, TCGv_i64))
2793*5c23704eSSong Gao{
2794*5c23704eSSong Gao    TCGv_i64 rh, rl, arg1, arg2, th, tl;
2795*5c23704eSSong Gao    int i;
2796*5c23704eSSong Gao
2797*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
2798*5c23704eSSong Gao        return true;
2799*5c23704eSSong Gao    }
2800*5c23704eSSong Gao
2801*5c23704eSSong Gao    rh = tcg_temp_new_i64();
2802*5c23704eSSong Gao    rl = tcg_temp_new_i64();
2803*5c23704eSSong Gao    arg1 = tcg_temp_new_i64();
2804*5c23704eSSong Gao    arg2 = tcg_temp_new_i64();
2805*5c23704eSSong Gao    th = tcg_temp_new_i64();
2806*5c23704eSSong Gao    tl = tcg_temp_new_i64();
2807*5c23704eSSong Gao
2808*5c23704eSSong Gao    for (i = 0; i < oprsz / 16; i++) {
2809*5c23704eSSong Gao        get_vreg64(arg1, a->vj, 2 * i + idx1);
2810*5c23704eSSong Gao        get_vreg64(arg2, a->vk, 2 * i + idx2);
2811*5c23704eSSong Gao        get_vreg64(rh, a->vd, 2 * i + 1);
2812*5c23704eSSong Gao        get_vreg64(rl, a->vd, 2 * i);
2813*5c23704eSSong Gao
2814*5c23704eSSong Gao        func(tl, th, arg1, arg2);
2815*5c23704eSSong Gao        tcg_gen_add2_i64(rl, rh, rl, rh, tl, th);
2816*5c23704eSSong Gao
2817*5c23704eSSong Gao        set_vreg64(rh, a->vd, 2 * i + 1);
2818*5c23704eSSong Gao        set_vreg64(rl, a->vd, 2 * i);
2819*5c23704eSSong Gao    }
2820*5c23704eSSong Gao
2821*5c23704eSSong Gao    return true;
2822*5c23704eSSong Gao}
2823*5c23704eSSong Gao
2824*5c23704eSSong Gaostatic bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2825*5c23704eSSong Gao                        void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
2826*5c23704eSSong Gao{
2827*5c23704eSSong Gao    return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func);
2828*5c23704eSSong Gao}
2829*5c23704eSSong Gao
2830*5c23704eSSong Gaostatic bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2831*5c23704eSSong Gao                         void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
2832*5c23704eSSong Gao{
2833*5c23704eSSong Gao    return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func);
2834*5c23704eSSong Gao}
2835*5c23704eSSong Gao
2836*5c23704eSSong GaoTRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64)
2837*5c23704eSSong GaoTRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64)
2838*5c23704eSSong GaoTRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64)
2839*5c23704eSSong GaoTRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64)
2840*5c23704eSSong GaoTRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64)
2841*5c23704eSSong GaoTRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64)
2842*5c23704eSSong GaoTRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64)
2843*5c23704eSSong GaoTRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64)
2844*5c23704eSSong GaoTRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64)
2845*5c23704eSSong GaoTRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64)
2846*5c23704eSSong GaoTRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64)
2847*5c23704eSSong GaoTRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64)
2848*5c23704eSSong Gao
2849*5c23704eSSong Gaostatic void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2850*5c23704eSSong Gao{
2851*5c23704eSSong Gao    TCGv_vec t1, t2, t3;
2852*5c23704eSSong Gao    int halfbits = 4 << vece;
2853*5c23704eSSong Gao
2854*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
2855*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
2856*5c23704eSSong Gao    t3 = tcg_temp_new_vec_matching(t);
2857*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t1, a, halfbits);
2858*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, b, halfbits);
2859*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t3, t1, t2);
2860*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t, t3);
2861*5c23704eSSong Gao}
2862*5c23704eSSong Gao
2863*5c23704eSSong Gaostatic void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2864*5c23704eSSong Gao{
2865*5c23704eSSong Gao    TCGv_i32 t1;
2866*5c23704eSSong Gao
2867*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2868*5c23704eSSong Gao    gen_vmulwod_w_h(t1, a, b);
2869*5c23704eSSong Gao    tcg_gen_add_i32(t, t, t1);
2870*5c23704eSSong Gao}
2871*5c23704eSSong Gao
2872*5c23704eSSong Gaostatic void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2873*5c23704eSSong Gao{
2874*5c23704eSSong Gao    TCGv_i64 t1;
2875*5c23704eSSong Gao
2876*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2877*5c23704eSSong Gao    gen_vmulwod_d_w(t1, a, b);
2878*5c23704eSSong Gao    tcg_gen_add_i64(t, t, t1);
2879*5c23704eSSong Gao}
2880*5c23704eSSong Gao
2881*5c23704eSSong Gaostatic void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2882*5c23704eSSong Gao                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2883*5c23704eSSong Gao{
2884*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2885*5c23704eSSong Gao        INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
2886*5c23704eSSong Gao        };
2887*5c23704eSSong Gao    static const GVecGen3 op[3] = {
2888*5c23704eSSong Gao        {
2889*5c23704eSSong Gao            .fniv = gen_vmaddwod_s,
2890*5c23704eSSong Gao            .fno = gen_helper_vmaddwod_h_b,
2891*5c23704eSSong Gao            .load_dest = true,
2892*5c23704eSSong Gao            .opt_opc = vecop_list,
2893*5c23704eSSong Gao            .vece = MO_16
2894*5c23704eSSong Gao        },
2895*5c23704eSSong Gao        {
2896*5c23704eSSong Gao            .fni4 = gen_vmaddwod_w_h,
2897*5c23704eSSong Gao            .fniv = gen_vmaddwod_s,
2898*5c23704eSSong Gao            .fno = gen_helper_vmaddwod_w_h,
2899*5c23704eSSong Gao            .load_dest = true,
2900*5c23704eSSong Gao            .opt_opc = vecop_list,
2901*5c23704eSSong Gao            .vece = MO_32
2902*5c23704eSSong Gao        },
2903*5c23704eSSong Gao        {
2904*5c23704eSSong Gao            .fni8 = gen_vmaddwod_d_w,
2905*5c23704eSSong Gao            .fniv = gen_vmaddwod_s,
2906*5c23704eSSong Gao            .fno = gen_helper_vmaddwod_d_w,
2907*5c23704eSSong Gao            .load_dest = true,
2908*5c23704eSSong Gao            .opt_opc = vecop_list,
2909*5c23704eSSong Gao            .vece = MO_64
2910*5c23704eSSong Gao        },
2911*5c23704eSSong Gao    };
2912*5c23704eSSong Gao
2913*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2914*5c23704eSSong Gao}
2915*5c23704eSSong Gao
2916*5c23704eSSong GaoTRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s)
2917*5c23704eSSong GaoTRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s)
2918*5c23704eSSong GaoTRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s)
2919*5c23704eSSong GaoTRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s)
2920*5c23704eSSong GaoTRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s)
2921*5c23704eSSong GaoTRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s)
2922*5c23704eSSong Gao
2923*5c23704eSSong Gaostatic void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2924*5c23704eSSong Gao{
2925*5c23704eSSong Gao    TCGv_vec t1, t2, mask;
2926*5c23704eSSong Gao
2927*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(t);
2928*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
2929*5c23704eSSong Gao    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2930*5c23704eSSong Gao    tcg_gen_and_vec(vece, t1, a, mask);
2931*5c23704eSSong Gao    tcg_gen_and_vec(vece, t2, b, mask);
2932*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t1, t1, t2);
2933*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t, t1);
2934*5c23704eSSong Gao}
2935*5c23704eSSong Gao
2936*5c23704eSSong Gaostatic void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2937*5c23704eSSong Gao{
2938*5c23704eSSong Gao    TCGv_i32 t1;
2939*5c23704eSSong Gao
2940*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
2941*5c23704eSSong Gao    gen_vmulwev_w_hu(t1, a, b);
2942*5c23704eSSong Gao    tcg_gen_add_i32(t, t, t1);
2943*5c23704eSSong Gao}
2944*5c23704eSSong Gao
2945*5c23704eSSong Gaostatic void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2946*5c23704eSSong Gao{
2947*5c23704eSSong Gao    TCGv_i64 t1;
2948*5c23704eSSong Gao
2949*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
2950*5c23704eSSong Gao    gen_vmulwev_d_wu(t1, a, b);
2951*5c23704eSSong Gao    tcg_gen_add_i64(t, t, t1);
2952*5c23704eSSong Gao}
2953*5c23704eSSong Gao
2954*5c23704eSSong Gaostatic void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2955*5c23704eSSong Gao                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2956*5c23704eSSong Gao{
2957*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
2958*5c23704eSSong Gao        INDEX_op_mul_vec, INDEX_op_add_vec, 0
2959*5c23704eSSong Gao        };
2960*5c23704eSSong Gao    static const GVecGen3 op[3] = {
2961*5c23704eSSong Gao        {
2962*5c23704eSSong Gao            .fniv = gen_vmaddwev_u,
2963*5c23704eSSong Gao            .fno = gen_helper_vmaddwev_h_bu,
2964*5c23704eSSong Gao            .load_dest = true,
2965*5c23704eSSong Gao            .opt_opc = vecop_list,
2966*5c23704eSSong Gao            .vece = MO_16
2967*5c23704eSSong Gao        },
2968*5c23704eSSong Gao        {
2969*5c23704eSSong Gao            .fni4 = gen_vmaddwev_w_hu,
2970*5c23704eSSong Gao            .fniv = gen_vmaddwev_u,
2971*5c23704eSSong Gao            .fno = gen_helper_vmaddwev_w_hu,
2972*5c23704eSSong Gao            .load_dest = true,
2973*5c23704eSSong Gao            .opt_opc = vecop_list,
2974*5c23704eSSong Gao            .vece = MO_32
2975*5c23704eSSong Gao        },
2976*5c23704eSSong Gao        {
2977*5c23704eSSong Gao            .fni8 = gen_vmaddwev_d_wu,
2978*5c23704eSSong Gao            .fniv = gen_vmaddwev_u,
2979*5c23704eSSong Gao            .fno = gen_helper_vmaddwev_d_wu,
2980*5c23704eSSong Gao            .load_dest = true,
2981*5c23704eSSong Gao            .opt_opc = vecop_list,
2982*5c23704eSSong Gao            .vece = MO_64
2983*5c23704eSSong Gao        },
2984*5c23704eSSong Gao    };
2985*5c23704eSSong Gao
2986*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2987*5c23704eSSong Gao}
2988*5c23704eSSong Gao
2989*5c23704eSSong GaoTRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u)
2990*5c23704eSSong GaoTRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u)
2991*5c23704eSSong GaoTRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u)
2992*5c23704eSSong GaoTRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u)
2993*5c23704eSSong GaoTRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u)
2994*5c23704eSSong GaoTRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u)
2995*5c23704eSSong Gao
2996*5c23704eSSong Gaostatic void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2997*5c23704eSSong Gao{
2998*5c23704eSSong Gao    TCGv_vec t1, t2, t3;
2999*5c23704eSSong Gao    int halfbits = 4 << vece;
3000*5c23704eSSong Gao
3001*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
3002*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
3003*5c23704eSSong Gao    t3 = tcg_temp_new_vec_matching(t);
3004*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t1, a, halfbits);
3005*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t2, b, halfbits);
3006*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t3, t1, t2);
3007*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t, t3);
3008*5c23704eSSong Gao}
3009*5c23704eSSong Gao
3010*5c23704eSSong Gaostatic void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
3011*5c23704eSSong Gao{
3012*5c23704eSSong Gao    TCGv_i32 t1;
3013*5c23704eSSong Gao
3014*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
3015*5c23704eSSong Gao    gen_vmulwod_w_hu(t1, a, b);
3016*5c23704eSSong Gao    tcg_gen_add_i32(t, t, t1);
3017*5c23704eSSong Gao}
3018*5c23704eSSong Gao
3019*5c23704eSSong Gaostatic void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
3020*5c23704eSSong Gao{
3021*5c23704eSSong Gao    TCGv_i64 t1;
3022*5c23704eSSong Gao
3023*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
3024*5c23704eSSong Gao    gen_vmulwod_d_wu(t1, a, b);
3025*5c23704eSSong Gao    tcg_gen_add_i64(t, t, t1);
3026*5c23704eSSong Gao}
3027*5c23704eSSong Gao
3028*5c23704eSSong Gaostatic void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3029*5c23704eSSong Gao                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3030*5c23704eSSong Gao{
3031*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
3032*5c23704eSSong Gao        INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
3033*5c23704eSSong Gao        };
3034*5c23704eSSong Gao    static const GVecGen3 op[3] = {
3035*5c23704eSSong Gao        {
3036*5c23704eSSong Gao            .fniv = gen_vmaddwod_u,
3037*5c23704eSSong Gao            .fno = gen_helper_vmaddwod_h_bu,
3038*5c23704eSSong Gao            .load_dest = true,
3039*5c23704eSSong Gao            .opt_opc = vecop_list,
3040*5c23704eSSong Gao            .vece = MO_16
3041*5c23704eSSong Gao        },
3042*5c23704eSSong Gao        {
3043*5c23704eSSong Gao            .fni4 = gen_vmaddwod_w_hu,
3044*5c23704eSSong Gao            .fniv = gen_vmaddwod_u,
3045*5c23704eSSong Gao            .fno = gen_helper_vmaddwod_w_hu,
3046*5c23704eSSong Gao            .load_dest = true,
3047*5c23704eSSong Gao            .opt_opc = vecop_list,
3048*5c23704eSSong Gao            .vece = MO_32
3049*5c23704eSSong Gao        },
3050*5c23704eSSong Gao        {
3051*5c23704eSSong Gao            .fni8 = gen_vmaddwod_d_wu,
3052*5c23704eSSong Gao            .fniv = gen_vmaddwod_u,
3053*5c23704eSSong Gao            .fno = gen_helper_vmaddwod_d_wu,
3054*5c23704eSSong Gao            .load_dest = true,
3055*5c23704eSSong Gao            .opt_opc = vecop_list,
3056*5c23704eSSong Gao            .vece = MO_64
3057*5c23704eSSong Gao        },
3058*5c23704eSSong Gao    };
3059*5c23704eSSong Gao
3060*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3061*5c23704eSSong Gao}
3062*5c23704eSSong Gao
3063*5c23704eSSong GaoTRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u)
3064*5c23704eSSong GaoTRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u)
3065*5c23704eSSong GaoTRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u)
3066*5c23704eSSong GaoTRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u)
3067*5c23704eSSong GaoTRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u)
3068*5c23704eSSong GaoTRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u)
3069*5c23704eSSong Gao
3070*5c23704eSSong Gaostatic void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3071*5c23704eSSong Gao{
3072*5c23704eSSong Gao    TCGv_vec t1, t2, mask;
3073*5c23704eSSong Gao    int halfbits = 4 << vece;
3074*5c23704eSSong Gao
3075*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
3076*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
3077*5c23704eSSong Gao    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
3078*5c23704eSSong Gao    tcg_gen_and_vec(vece, t1, a, mask);
3079*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t2, b, halfbits);
3080*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, t2, halfbits);
3081*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t1, t1, t2);
3082*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t, t1);
3083*5c23704eSSong Gao}
3084*5c23704eSSong Gao
3085*5c23704eSSong Gaostatic void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
3086*5c23704eSSong Gao{
3087*5c23704eSSong Gao    TCGv_i32 t1;
3088*5c23704eSSong Gao
3089*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
3090*5c23704eSSong Gao    gen_vmulwev_w_hu_h(t1, a, b);
3091*5c23704eSSong Gao    tcg_gen_add_i32(t, t, t1);
3092*5c23704eSSong Gao}
3093*5c23704eSSong Gao
3094*5c23704eSSong Gaostatic void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
3095*5c23704eSSong Gao{
3096*5c23704eSSong Gao    TCGv_i64 t1;
3097*5c23704eSSong Gao
3098*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
3099*5c23704eSSong Gao    gen_vmulwev_d_wu_w(t1, a, b);
3100*5c23704eSSong Gao    tcg_gen_add_i64(t, t, t1);
3101*5c23704eSSong Gao}
3102*5c23704eSSong Gao
3103*5c23704eSSong Gaostatic void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3104*5c23704eSSong Gao                            uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3105*5c23704eSSong Gao{
3106*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
3107*5c23704eSSong Gao        INDEX_op_shli_vec, INDEX_op_sari_vec,
3108*5c23704eSSong Gao        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3109*5c23704eSSong Gao        };
3110*5c23704eSSong Gao    static const GVecGen3 op[3] = {
3111*5c23704eSSong Gao        {
3112*5c23704eSSong Gao            .fniv = gen_vmaddwev_u_s,
3113*5c23704eSSong Gao            .fno = gen_helper_vmaddwev_h_bu_b,
3114*5c23704eSSong Gao            .load_dest = true,
3115*5c23704eSSong Gao            .opt_opc = vecop_list,
3116*5c23704eSSong Gao            .vece = MO_16
3117*5c23704eSSong Gao        },
3118*5c23704eSSong Gao        {
3119*5c23704eSSong Gao            .fni4 = gen_vmaddwev_w_hu_h,
3120*5c23704eSSong Gao            .fniv = gen_vmaddwev_u_s,
3121*5c23704eSSong Gao            .fno = gen_helper_vmaddwev_w_hu_h,
3122*5c23704eSSong Gao            .load_dest = true,
3123*5c23704eSSong Gao            .opt_opc = vecop_list,
3124*5c23704eSSong Gao            .vece = MO_32
3125*5c23704eSSong Gao        },
3126*5c23704eSSong Gao        {
3127*5c23704eSSong Gao            .fni8 = gen_vmaddwev_d_wu_w,
3128*5c23704eSSong Gao            .fniv = gen_vmaddwev_u_s,
3129*5c23704eSSong Gao            .fno = gen_helper_vmaddwev_d_wu_w,
3130*5c23704eSSong Gao            .load_dest = true,
3131*5c23704eSSong Gao            .opt_opc = vecop_list,
3132*5c23704eSSong Gao            .vece = MO_64
3133*5c23704eSSong Gao        },
3134*5c23704eSSong Gao    };
3135*5c23704eSSong Gao
3136*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3137*5c23704eSSong Gao}
3138*5c23704eSSong Gao
3139*5c23704eSSong GaoTRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s)
3140*5c23704eSSong GaoTRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s)
3141*5c23704eSSong GaoTRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s)
3142*5c23704eSSong GaoTRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s)
3143*5c23704eSSong GaoTRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s)
3144*5c23704eSSong GaoTRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s)
3145*5c23704eSSong Gao
3146*5c23704eSSong Gaostatic void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3147*5c23704eSSong Gao{
3148*5c23704eSSong Gao    TCGv_vec t1, t2, t3;
3149*5c23704eSSong Gao    int halfbits = 4 << vece;
3150*5c23704eSSong Gao
3151*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(a);
3152*5c23704eSSong Gao    t2 = tcg_temp_new_vec_matching(b);
3153*5c23704eSSong Gao    t3 = tcg_temp_new_vec_matching(t);
3154*5c23704eSSong Gao    tcg_gen_shri_vec(vece, t1, a, halfbits);
3155*5c23704eSSong Gao    tcg_gen_sari_vec(vece, t2, b, halfbits);
3156*5c23704eSSong Gao    tcg_gen_mul_vec(vece, t3, t1, t2);
3157*5c23704eSSong Gao    tcg_gen_add_vec(vece, t, t, t3);
3158*5c23704eSSong Gao}
3159*5c23704eSSong Gao
3160*5c23704eSSong Gaostatic void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
3161*5c23704eSSong Gao{
3162*5c23704eSSong Gao    TCGv_i32 t1;
3163*5c23704eSSong Gao
3164*5c23704eSSong Gao    t1 = tcg_temp_new_i32();
3165*5c23704eSSong Gao    gen_vmulwod_w_hu_h(t1, a, b);
3166*5c23704eSSong Gao    tcg_gen_add_i32(t, t, t1);
3167*5c23704eSSong Gao}
3168*5c23704eSSong Gao
3169*5c23704eSSong Gaostatic void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
3170*5c23704eSSong Gao{
3171*5c23704eSSong Gao    TCGv_i64 t1;
3172*5c23704eSSong Gao
3173*5c23704eSSong Gao    t1 = tcg_temp_new_i64();
3174*5c23704eSSong Gao    gen_vmulwod_d_wu_w(t1, a, b);
3175*5c23704eSSong Gao    tcg_gen_add_i64(t, t, t1);
3176*5c23704eSSong Gao}
3177*5c23704eSSong Gao
3178*5c23704eSSong Gaostatic void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3179*5c23704eSSong Gao                            uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3180*5c23704eSSong Gao{
3181*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
3182*5c23704eSSong Gao        INDEX_op_shri_vec, INDEX_op_sari_vec,
3183*5c23704eSSong Gao        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3184*5c23704eSSong Gao        };
3185*5c23704eSSong Gao    static const GVecGen3 op[3] = {
3186*5c23704eSSong Gao        {
3187*5c23704eSSong Gao            .fniv = gen_vmaddwod_u_s,
3188*5c23704eSSong Gao            .fno = gen_helper_vmaddwod_h_bu_b,
3189*5c23704eSSong Gao            .load_dest = true,
3190*5c23704eSSong Gao            .opt_opc = vecop_list,
3191*5c23704eSSong Gao            .vece = MO_16
3192*5c23704eSSong Gao        },
3193*5c23704eSSong Gao        {
3194*5c23704eSSong Gao            .fni4 = gen_vmaddwod_w_hu_h,
3195*5c23704eSSong Gao            .fniv = gen_vmaddwod_u_s,
3196*5c23704eSSong Gao            .fno = gen_helper_vmaddwod_w_hu_h,
3197*5c23704eSSong Gao            .load_dest = true,
3198*5c23704eSSong Gao            .opt_opc = vecop_list,
3199*5c23704eSSong Gao            .vece = MO_32
3200*5c23704eSSong Gao        },
3201*5c23704eSSong Gao        {
3202*5c23704eSSong Gao            .fni8 = gen_vmaddwod_d_wu_w,
3203*5c23704eSSong Gao            .fniv = gen_vmaddwod_u_s,
3204*5c23704eSSong Gao            .fno = gen_helper_vmaddwod_d_wu_w,
3205*5c23704eSSong Gao            .load_dest = true,
3206*5c23704eSSong Gao            .opt_opc = vecop_list,
3207*5c23704eSSong Gao            .vece = MO_64
3208*5c23704eSSong Gao        },
3209*5c23704eSSong Gao    };
3210*5c23704eSSong Gao
3211*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3212*5c23704eSSong Gao}
3213*5c23704eSSong Gao
3214*5c23704eSSong GaoTRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s)
3215*5c23704eSSong GaoTRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s)
3216*5c23704eSSong GaoTRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s)
3217*5c23704eSSong GaoTRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s)
3218*5c23704eSSong GaoTRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s)
3219*5c23704eSSong GaoTRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s)
3220*5c23704eSSong Gao
3221*5c23704eSSong GaoTRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b)
3222*5c23704eSSong GaoTRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h)
3223*5c23704eSSong GaoTRANS(vdiv_w, LSX, gen_vvv, gen_helper_vdiv_w)
3224*5c23704eSSong GaoTRANS(vdiv_d, LSX, gen_vvv, gen_helper_vdiv_d)
3225*5c23704eSSong GaoTRANS(vdiv_bu, LSX, gen_vvv, gen_helper_vdiv_bu)
3226*5c23704eSSong GaoTRANS(vdiv_hu, LSX, gen_vvv, gen_helper_vdiv_hu)
3227*5c23704eSSong GaoTRANS(vdiv_wu, LSX, gen_vvv, gen_helper_vdiv_wu)
3228*5c23704eSSong GaoTRANS(vdiv_du, LSX, gen_vvv, gen_helper_vdiv_du)
3229*5c23704eSSong GaoTRANS(vmod_b, LSX, gen_vvv, gen_helper_vmod_b)
3230*5c23704eSSong GaoTRANS(vmod_h, LSX, gen_vvv, gen_helper_vmod_h)
3231*5c23704eSSong GaoTRANS(vmod_w, LSX, gen_vvv, gen_helper_vmod_w)
3232*5c23704eSSong GaoTRANS(vmod_d, LSX, gen_vvv, gen_helper_vmod_d)
3233*5c23704eSSong GaoTRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu)
3234*5c23704eSSong GaoTRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu)
3235*5c23704eSSong GaoTRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu)
3236*5c23704eSSong GaoTRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du)
3237*5c23704eSSong GaoTRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b)
3238*5c23704eSSong GaoTRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h)
3239*5c23704eSSong GaoTRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w)
3240*5c23704eSSong GaoTRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d)
3241*5c23704eSSong GaoTRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu)
3242*5c23704eSSong GaoTRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu)
3243*5c23704eSSong GaoTRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu)
3244*5c23704eSSong GaoTRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du)
3245*5c23704eSSong GaoTRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b)
3246*5c23704eSSong GaoTRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h)
3247*5c23704eSSong GaoTRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w)
3248*5c23704eSSong GaoTRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d)
3249*5c23704eSSong GaoTRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu)
3250*5c23704eSSong GaoTRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu)
3251*5c23704eSSong GaoTRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu)
3252*5c23704eSSong GaoTRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du)
3253*5c23704eSSong Gao
3254*5c23704eSSong Gaostatic void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
3255*5c23704eSSong Gao{
3256*5c23704eSSong Gao    TCGv_vec min;
3257*5c23704eSSong Gao
3258*5c23704eSSong Gao    min = tcg_temp_new_vec_matching(t);
3259*5c23704eSSong Gao    tcg_gen_not_vec(vece, min, max);
3260*5c23704eSSong Gao    tcg_gen_smax_vec(vece, t, a, min);
3261*5c23704eSSong Gao    tcg_gen_smin_vec(vece, t, t, max);
3262*5c23704eSSong Gao}
3263*5c23704eSSong Gao
3264*5c23704eSSong Gaostatic void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3265*5c23704eSSong Gao                      int64_t imm, uint32_t oprsz, uint32_t maxsz)
3266*5c23704eSSong Gao{
3267*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
3268*5c23704eSSong Gao        INDEX_op_smax_vec, INDEX_op_smin_vec, 0
3269*5c23704eSSong Gao        };
3270*5c23704eSSong Gao    static const GVecGen2s op[4] = {
3271*5c23704eSSong Gao        {
3272*5c23704eSSong Gao            .fniv = gen_vsat_s,
3273*5c23704eSSong Gao            .fno = gen_helper_vsat_b,
3274*5c23704eSSong Gao            .opt_opc = vecop_list,
3275*5c23704eSSong Gao            .vece = MO_8
3276*5c23704eSSong Gao        },
3277*5c23704eSSong Gao        {
3278*5c23704eSSong Gao            .fniv = gen_vsat_s,
3279*5c23704eSSong Gao            .fno = gen_helper_vsat_h,
3280*5c23704eSSong Gao            .opt_opc = vecop_list,
3281*5c23704eSSong Gao            .vece = MO_16
3282*5c23704eSSong Gao        },
3283*5c23704eSSong Gao        {
3284*5c23704eSSong Gao            .fniv = gen_vsat_s,
3285*5c23704eSSong Gao            .fno = gen_helper_vsat_w,
3286*5c23704eSSong Gao            .opt_opc = vecop_list,
3287*5c23704eSSong Gao            .vece = MO_32
3288*5c23704eSSong Gao        },
3289*5c23704eSSong Gao        {
3290*5c23704eSSong Gao            .fniv = gen_vsat_s,
3291*5c23704eSSong Gao            .fno = gen_helper_vsat_d,
3292*5c23704eSSong Gao            .opt_opc = vecop_list,
3293*5c23704eSSong Gao            .vece = MO_64
3294*5c23704eSSong Gao        },
3295*5c23704eSSong Gao    };
3296*5c23704eSSong Gao
3297*5c23704eSSong Gao    tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
3298*5c23704eSSong Gao                    tcg_constant_i64((1ll<< imm) -1), &op[vece]);
3299*5c23704eSSong Gao}
3300*5c23704eSSong Gao
3301*5c23704eSSong GaoTRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s)
3302*5c23704eSSong GaoTRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s)
3303*5c23704eSSong GaoTRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s)
3304*5c23704eSSong GaoTRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s)
3305*5c23704eSSong GaoTRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s)
3306*5c23704eSSong GaoTRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s)
3307*5c23704eSSong GaoTRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s)
3308*5c23704eSSong GaoTRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s)
3309*5c23704eSSong Gao
3310*5c23704eSSong Gaostatic void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
3311*5c23704eSSong Gao{
3312*5c23704eSSong Gao    tcg_gen_umin_vec(vece, t, a, max);
3313*5c23704eSSong Gao}
3314*5c23704eSSong Gao
3315*5c23704eSSong Gaostatic void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3316*5c23704eSSong Gao                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
3317*5c23704eSSong Gao{
3318*5c23704eSSong Gao    uint64_t max;
3319*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
3320*5c23704eSSong Gao        INDEX_op_umin_vec, 0
3321*5c23704eSSong Gao        };
3322*5c23704eSSong Gao    static const GVecGen2s op[4] = {
3323*5c23704eSSong Gao        {
3324*5c23704eSSong Gao            .fniv = gen_vsat_u,
3325*5c23704eSSong Gao            .fno = gen_helper_vsat_bu,
3326*5c23704eSSong Gao            .opt_opc = vecop_list,
3327*5c23704eSSong Gao            .vece = MO_8
3328*5c23704eSSong Gao        },
3329*5c23704eSSong Gao        {
3330*5c23704eSSong Gao            .fniv = gen_vsat_u,
3331*5c23704eSSong Gao            .fno = gen_helper_vsat_hu,
3332*5c23704eSSong Gao            .opt_opc = vecop_list,
3333*5c23704eSSong Gao            .vece = MO_16
3334*5c23704eSSong Gao        },
3335*5c23704eSSong Gao        {
3336*5c23704eSSong Gao            .fniv = gen_vsat_u,
3337*5c23704eSSong Gao            .fno = gen_helper_vsat_wu,
3338*5c23704eSSong Gao            .opt_opc = vecop_list,
3339*5c23704eSSong Gao            .vece = MO_32
3340*5c23704eSSong Gao        },
3341*5c23704eSSong Gao        {
3342*5c23704eSSong Gao            .fniv = gen_vsat_u,
3343*5c23704eSSong Gao            .fno = gen_helper_vsat_du,
3344*5c23704eSSong Gao            .opt_opc = vecop_list,
3345*5c23704eSSong Gao            .vece = MO_64
3346*5c23704eSSong Gao        },
3347*5c23704eSSong Gao    };
3348*5c23704eSSong Gao
3349*5c23704eSSong Gao    max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1;
3350*5c23704eSSong Gao    tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
3351*5c23704eSSong Gao                    tcg_constant_i64(max), &op[vece]);
3352*5c23704eSSong Gao}
3353*5c23704eSSong Gao
3354*5c23704eSSong GaoTRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u)
3355*5c23704eSSong GaoTRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u)
3356*5c23704eSSong GaoTRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u)
3357*5c23704eSSong GaoTRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u)
3358*5c23704eSSong GaoTRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u)
3359*5c23704eSSong GaoTRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u)
3360*5c23704eSSong GaoTRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u)
3361*5c23704eSSong GaoTRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u)
3362*5c23704eSSong Gao
3363*5c23704eSSong GaoTRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b)
3364*5c23704eSSong GaoTRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h)
3365*5c23704eSSong GaoTRANS(vexth_d_w, LSX, gen_vv, gen_helper_vexth_d_w)
3366*5c23704eSSong GaoTRANS(vexth_q_d, LSX, gen_vv, gen_helper_vexth_q_d)
3367*5c23704eSSong GaoTRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu)
3368*5c23704eSSong GaoTRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu)
3369*5c23704eSSong GaoTRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu)
3370*5c23704eSSong GaoTRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du)
3371*5c23704eSSong GaoTRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b)
3372*5c23704eSSong GaoTRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h)
3373*5c23704eSSong GaoTRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w)
3374*5c23704eSSong GaoTRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d)
3375*5c23704eSSong GaoTRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu)
3376*5c23704eSSong GaoTRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu)
3377*5c23704eSSong GaoTRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu)
3378*5c23704eSSong GaoTRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du)
3379*5c23704eSSong Gao
3380*5c23704eSSong GaoTRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b)
3381*5c23704eSSong GaoTRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b)
3382*5c23704eSSong GaoTRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b)
3383*5c23704eSSong GaoTRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h)
3384*5c23704eSSong GaoTRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h)
3385*5c23704eSSong GaoTRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w)
3386*5c23704eSSong GaoTRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu)
3387*5c23704eSSong GaoTRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu)
3388*5c23704eSSong GaoTRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu)
3389*5c23704eSSong GaoTRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu)
3390*5c23704eSSong GaoTRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu)
3391*5c23704eSSong GaoTRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu)
3392*5c23704eSSong Gao
3393*5c23704eSSong Gaostatic void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3394*5c23704eSSong Gao{
3395*5c23704eSSong Gao    TCGv_vec t1, zero;
3396*5c23704eSSong Gao
3397*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(t);
3398*5c23704eSSong Gao    zero = tcg_constant_vec_matching(t, vece, 0);
3399*5c23704eSSong Gao
3400*5c23704eSSong Gao    tcg_gen_neg_vec(vece, t1, b);
3401*5c23704eSSong Gao    tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b);
3402*5c23704eSSong Gao    tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t);
3403*5c23704eSSong Gao}
3404*5c23704eSSong Gao
3405*5c23704eSSong Gaostatic void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3406*5c23704eSSong Gao                        uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3407*5c23704eSSong Gao{
3408*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
3409*5c23704eSSong Gao        INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0
3410*5c23704eSSong Gao        };
3411*5c23704eSSong Gao    static const GVecGen3 op[4] = {
3412*5c23704eSSong Gao        {
3413*5c23704eSSong Gao            .fniv = gen_vsigncov,
3414*5c23704eSSong Gao            .fno = gen_helper_vsigncov_b,
3415*5c23704eSSong Gao            .opt_opc = vecop_list,
3416*5c23704eSSong Gao            .vece = MO_8
3417*5c23704eSSong Gao        },
3418*5c23704eSSong Gao        {
3419*5c23704eSSong Gao            .fniv = gen_vsigncov,
3420*5c23704eSSong Gao            .fno = gen_helper_vsigncov_h,
3421*5c23704eSSong Gao            .opt_opc = vecop_list,
3422*5c23704eSSong Gao            .vece = MO_16
3423*5c23704eSSong Gao        },
3424*5c23704eSSong Gao        {
3425*5c23704eSSong Gao            .fniv = gen_vsigncov,
3426*5c23704eSSong Gao            .fno = gen_helper_vsigncov_w,
3427*5c23704eSSong Gao            .opt_opc = vecop_list,
3428*5c23704eSSong Gao            .vece = MO_32
3429*5c23704eSSong Gao        },
3430*5c23704eSSong Gao        {
3431*5c23704eSSong Gao            .fniv = gen_vsigncov,
3432*5c23704eSSong Gao            .fno = gen_helper_vsigncov_d,
3433*5c23704eSSong Gao            .opt_opc = vecop_list,
3434*5c23704eSSong Gao            .vece = MO_64
3435*5c23704eSSong Gao        },
3436*5c23704eSSong Gao    };
3437*5c23704eSSong Gao
3438*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3439*5c23704eSSong Gao}
3440*5c23704eSSong Gao
3441*5c23704eSSong GaoTRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov)
3442*5c23704eSSong GaoTRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov)
3443*5c23704eSSong GaoTRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov)
3444*5c23704eSSong GaoTRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov)
3445*5c23704eSSong GaoTRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov)
3446*5c23704eSSong GaoTRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov)
3447*5c23704eSSong GaoTRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov)
3448*5c23704eSSong GaoTRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov)
3449*5c23704eSSong Gao
3450*5c23704eSSong GaoTRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b)
3451*5c23704eSSong GaoTRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h)
3452*5c23704eSSong GaoTRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w)
3453*5c23704eSSong GaoTRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d)
3454*5c23704eSSong GaoTRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b)
3455*5c23704eSSong GaoTRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b)
3456*5c23704eSSong GaoTRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b)
3457*5c23704eSSong GaoTRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h)
3458*5c23704eSSong GaoTRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w)
3459*5c23704eSSong GaoTRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d)
3460*5c23704eSSong GaoTRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b)
3461*5c23704eSSong GaoTRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b)
3462*5c23704eSSong Gao
3463*5c23704eSSong Gao#define EXPAND_BYTE(bit)  ((uint64_t)(bit ? 0xff : 0))
3464*5c23704eSSong Gao
3465*5c23704eSSong Gaostatic uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
3466*5c23704eSSong Gao{
3467*5c23704eSSong Gao    int mode;
3468*5c23704eSSong Gao    uint64_t data, t;
3469*5c23704eSSong Gao
3470*5c23704eSSong Gao    /*
3471*5c23704eSSong Gao     * imm bit [11:8] is mode, mode value is 0-12.
3472*5c23704eSSong Gao     * other values are invalid.
3473*5c23704eSSong Gao     */
3474*5c23704eSSong Gao    mode = (imm >> 8) & 0xf;
3475*5c23704eSSong Gao    t =  imm & 0xff;
3476*5c23704eSSong Gao    switch (mode) {
3477*5c23704eSSong Gao    case 0:
3478*5c23704eSSong Gao        /* data: {2{24'0, imm[7:0]}} */
3479*5c23704eSSong Gao        data =  (t << 32) | t ;
3480*5c23704eSSong Gao        break;
3481*5c23704eSSong Gao    case 1:
3482*5c23704eSSong Gao        /* data: {2{16'0, imm[7:0], 8'0}} */
3483*5c23704eSSong Gao        data = (t << 24) | (t << 8);
3484*5c23704eSSong Gao        break;
3485*5c23704eSSong Gao    case 2:
3486*5c23704eSSong Gao        /* data: {2{8'0, imm[7:0], 16'0}} */
3487*5c23704eSSong Gao        data = (t << 48) | (t << 16);
3488*5c23704eSSong Gao        break;
3489*5c23704eSSong Gao    case 3:
3490*5c23704eSSong Gao        /* data: {2{imm[7:0], 24'0}} */
3491*5c23704eSSong Gao        data = (t << 56) | (t << 24);
3492*5c23704eSSong Gao        break;
3493*5c23704eSSong Gao    case 4:
3494*5c23704eSSong Gao        /* data: {4{8'0, imm[7:0]}} */
3495*5c23704eSSong Gao        data = (t << 48) | (t << 32) | (t << 16) | t;
3496*5c23704eSSong Gao        break;
3497*5c23704eSSong Gao    case 5:
3498*5c23704eSSong Gao        /* data: {4{imm[7:0], 8'0}} */
3499*5c23704eSSong Gao        data = (t << 56) |(t << 40) | (t << 24) | (t << 8);
3500*5c23704eSSong Gao        break;
3501*5c23704eSSong Gao    case 6:
3502*5c23704eSSong Gao        /* data: {2{16'0, imm[7:0], 8'1}} */
3503*5c23704eSSong Gao        data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff;
3504*5c23704eSSong Gao        break;
3505*5c23704eSSong Gao    case 7:
3506*5c23704eSSong Gao        /* data: {2{8'0, imm[7:0], 16'1}} */
3507*5c23704eSSong Gao        data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff;
3508*5c23704eSSong Gao        break;
3509*5c23704eSSong Gao    case 8:
3510*5c23704eSSong Gao        /* data: {8{imm[7:0]}} */
3511*5c23704eSSong Gao        data =(t << 56) | (t << 48) | (t << 40) | (t << 32) |
3512*5c23704eSSong Gao              (t << 24) | (t << 16) | (t << 8) | t;
3513*5c23704eSSong Gao        break;
3514*5c23704eSSong Gao    case 9:
3515*5c23704eSSong Gao        /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */
3516*5c23704eSSong Gao        {
3517*5c23704eSSong Gao            uint64_t b0,b1,b2,b3,b4,b5,b6,b7;
3518*5c23704eSSong Gao            b0 = t& 0x1;
3519*5c23704eSSong Gao            b1 = (t & 0x2) >> 1;
3520*5c23704eSSong Gao            b2 = (t & 0x4) >> 2;
3521*5c23704eSSong Gao            b3 = (t & 0x8) >> 3;
3522*5c23704eSSong Gao            b4 = (t & 0x10) >> 4;
3523*5c23704eSSong Gao            b5 = (t & 0x20) >> 5;
3524*5c23704eSSong Gao            b6 = (t & 0x40) >> 6;
3525*5c23704eSSong Gao            b7 = (t & 0x80) >> 7;
3526*5c23704eSSong Gao            data = (EXPAND_BYTE(b7) << 56) |
3527*5c23704eSSong Gao                   (EXPAND_BYTE(b6) << 48) |
3528*5c23704eSSong Gao                   (EXPAND_BYTE(b5) << 40) |
3529*5c23704eSSong Gao                   (EXPAND_BYTE(b4) << 32) |
3530*5c23704eSSong Gao                   (EXPAND_BYTE(b3) << 24) |
3531*5c23704eSSong Gao                   (EXPAND_BYTE(b2) << 16) |
3532*5c23704eSSong Gao                   (EXPAND_BYTE(b1) <<  8) |
3533*5c23704eSSong Gao                   EXPAND_BYTE(b0);
3534*5c23704eSSong Gao        }
3535*5c23704eSSong Gao        break;
3536*5c23704eSSong Gao    case 10:
3537*5c23704eSSong Gao        /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */
3538*5c23704eSSong Gao        {
3539*5c23704eSSong Gao            uint64_t b6, b7;
3540*5c23704eSSong Gao            uint64_t t0, t1;
3541*5c23704eSSong Gao            b6 = (imm & 0x40) >> 6;
3542*5c23704eSSong Gao            b7 = (imm & 0x80) >> 7;
3543*5c23704eSSong Gao            t0 = (imm & 0x3f);
3544*5c23704eSSong Gao            t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0);
3545*5c23704eSSong Gao            data  = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19);
3546*5c23704eSSong Gao        }
3547*5c23704eSSong Gao        break;
3548*5c23704eSSong Gao    case 11:
3549*5c23704eSSong Gao        /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */
3550*5c23704eSSong Gao        {
3551*5c23704eSSong Gao            uint64_t b6,b7;
3552*5c23704eSSong Gao            uint64_t t0, t1;
3553*5c23704eSSong Gao            b6 = (imm & 0x40) >> 6;
3554*5c23704eSSong Gao            b7 = (imm & 0x80) >> 7;
3555*5c23704eSSong Gao            t0 = (imm & 0x3f);
3556*5c23704eSSong Gao            t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0);
3557*5c23704eSSong Gao            data = (t1 << 25) | (t0 << 19);
3558*5c23704eSSong Gao        }
3559*5c23704eSSong Gao        break;
3560*5c23704eSSong Gao    case 12:
3561*5c23704eSSong Gao        /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */
3562*5c23704eSSong Gao        {
3563*5c23704eSSong Gao            uint64_t b6,b7;
3564*5c23704eSSong Gao            uint64_t t0, t1;
3565*5c23704eSSong Gao            b6 = (imm & 0x40) >> 6;
3566*5c23704eSSong Gao            b7 = (imm & 0x80) >> 7;
3567*5c23704eSSong Gao            t0 = (imm & 0x3f);
3568*5c23704eSSong Gao            t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0);
3569*5c23704eSSong Gao            data = (t1 << 54) | (t0 << 48);
3570*5c23704eSSong Gao        }
3571*5c23704eSSong Gao        break;
3572*5c23704eSSong Gao    default:
3573*5c23704eSSong Gao        generate_exception(ctx, EXCCODE_INE);
3574*5c23704eSSong Gao        g_assert_not_reached();
3575*5c23704eSSong Gao    }
3576*5c23704eSSong Gao    return data;
3577*5c23704eSSong Gao}
3578*5c23704eSSong Gao
3579*5c23704eSSong Gaostatic bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz)
3580*5c23704eSSong Gao{
3581*5c23704eSSong Gao    int sel, vece;
3582*5c23704eSSong Gao    uint64_t value;
3583*5c23704eSSong Gao
3584*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
3585*5c23704eSSong Gao        return true;
3586*5c23704eSSong Gao    }
3587*5c23704eSSong Gao
3588*5c23704eSSong Gao    sel = (a->imm >> 12) & 0x1;
3589*5c23704eSSong Gao
3590*5c23704eSSong Gao    if (sel) {
3591*5c23704eSSong Gao        value = vldi_get_value(ctx, a->imm);
3592*5c23704eSSong Gao        vece = MO_64;
3593*5c23704eSSong Gao    } else {
3594*5c23704eSSong Gao        value = ((int32_t)(a->imm << 22)) >> 22;
3595*5c23704eSSong Gao        vece = (a->imm >> 10) & 0x3;
3596*5c23704eSSong Gao    }
3597*5c23704eSSong Gao
3598*5c23704eSSong Gao    tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8,
3599*5c23704eSSong Gao                         tcg_constant_i64(value));
3600*5c23704eSSong Gao    return true;
3601*5c23704eSSong Gao}
3602*5c23704eSSong Gao
3603*5c23704eSSong GaoTRANS(vldi, LSX, gen_vldi, 16)
3604*5c23704eSSong GaoTRANS(xvldi, LASX, gen_vldi, 32)
3605*5c23704eSSong Gao
3606*5c23704eSSong Gaostatic bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz)
3607*5c23704eSSong Gao{
3608*5c23704eSSong Gao    uint32_t vd_ofs, vj_ofs, vk_ofs;
3609*5c23704eSSong Gao
3610*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
3611*5c23704eSSong Gao        return true;
3612*5c23704eSSong Gao    }
3613*5c23704eSSong Gao
3614*5c23704eSSong Gao    vd_ofs = vec_full_offset(a->vd);
3615*5c23704eSSong Gao    vj_ofs = vec_full_offset(a->vj);
3616*5c23704eSSong Gao    vk_ofs = vec_full_offset(a->vk);
3617*5c23704eSSong Gao
3618*5c23704eSSong Gao    tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8);
3619*5c23704eSSong Gao    return true;
3620*5c23704eSSong Gao}
3621*5c23704eSSong Gao
3622*5c23704eSSong Gaostatic void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3623*5c23704eSSong Gao{
3624*5c23704eSSong Gao    TCGv_vec t1;
3625*5c23704eSSong Gao
3626*5c23704eSSong Gao    t1 = tcg_constant_vec_matching(t, vece, imm);
3627*5c23704eSSong Gao    tcg_gen_nor_vec(vece, t, a, t1);
3628*5c23704eSSong Gao}
3629*5c23704eSSong Gao
3630*5c23704eSSong Gaostatic void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm)
3631*5c23704eSSong Gao{
3632*5c23704eSSong Gao    tcg_gen_movi_i64(t, dup_const(MO_8, imm));
3633*5c23704eSSong Gao    tcg_gen_nor_i64(t, a, t);
3634*5c23704eSSong Gao}
3635*5c23704eSSong Gao
3636*5c23704eSSong Gaostatic void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3637*5c23704eSSong Gao                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
3638*5c23704eSSong Gao{
3639*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
3640*5c23704eSSong Gao        INDEX_op_nor_vec, 0
3641*5c23704eSSong Gao        };
3642*5c23704eSSong Gao    static const GVecGen2i op = {
3643*5c23704eSSong Gao       .fni8 = gen_vnori_b,
3644*5c23704eSSong Gao       .fniv = gen_vnori,
3645*5c23704eSSong Gao       .fnoi = gen_helper_vnori_b,
3646*5c23704eSSong Gao       .opt_opc = vecop_list,
3647*5c23704eSSong Gao       .vece = MO_8
3648*5c23704eSSong Gao    };
3649*5c23704eSSong Gao
3650*5c23704eSSong Gao    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
3651*5c23704eSSong Gao}
3652*5c23704eSSong Gao
3653*5c23704eSSong GaoTRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
3654*5c23704eSSong GaoTRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
3655*5c23704eSSong GaoTRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
3656*5c23704eSSong GaoTRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
3657*5c23704eSSong GaoTRANS(vandn_v, LSX, gen_vandn_v, 16)
3658*5c23704eSSong GaoTRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
3659*5c23704eSSong GaoTRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
3660*5c23704eSSong GaoTRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
3661*5c23704eSSong GaoTRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
3662*5c23704eSSong GaoTRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b)
3663*5c23704eSSong GaoTRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and)
3664*5c23704eSSong GaoTRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or)
3665*5c23704eSSong GaoTRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor)
3666*5c23704eSSong GaoTRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor)
3667*5c23704eSSong GaoTRANS(xvandn_v, LASX, gen_vandn_v, 32)
3668*5c23704eSSong GaoTRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc)
3669*5c23704eSSong GaoTRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi)
3670*5c23704eSSong GaoTRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori)
3671*5c23704eSSong GaoTRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori)
3672*5c23704eSSong GaoTRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b)
3673*5c23704eSSong Gao
3674*5c23704eSSong GaoTRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
3675*5c23704eSSong GaoTRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
3676*5c23704eSSong GaoTRANS(vsll_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shlv)
3677*5c23704eSSong GaoTRANS(vsll_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shlv)
3678*5c23704eSSong GaoTRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
3679*5c23704eSSong GaoTRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
3680*5c23704eSSong GaoTRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
3681*5c23704eSSong GaoTRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
3682*5c23704eSSong GaoTRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv)
3683*5c23704eSSong GaoTRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv)
3684*5c23704eSSong GaoTRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv)
3685*5c23704eSSong GaoTRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv)
3686*5c23704eSSong GaoTRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli)
3687*5c23704eSSong GaoTRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli)
3688*5c23704eSSong GaoTRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli)
3689*5c23704eSSong GaoTRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli)
3690*5c23704eSSong Gao
3691*5c23704eSSong GaoTRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
3692*5c23704eSSong GaoTRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
3693*5c23704eSSong GaoTRANS(vsrl_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shrv)
3694*5c23704eSSong GaoTRANS(vsrl_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shrv)
3695*5c23704eSSong GaoTRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
3696*5c23704eSSong GaoTRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
3697*5c23704eSSong GaoTRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
3698*5c23704eSSong GaoTRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
3699*5c23704eSSong GaoTRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv)
3700*5c23704eSSong GaoTRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv)
3701*5c23704eSSong GaoTRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv)
3702*5c23704eSSong GaoTRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv)
3703*5c23704eSSong GaoTRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri)
3704*5c23704eSSong GaoTRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri)
3705*5c23704eSSong GaoTRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri)
3706*5c23704eSSong GaoTRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri)
3707*5c23704eSSong Gao
3708*5c23704eSSong GaoTRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
3709*5c23704eSSong GaoTRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
3710*5c23704eSSong GaoTRANS(vsra_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sarv)
3711*5c23704eSSong GaoTRANS(vsra_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sarv)
3712*5c23704eSSong GaoTRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
3713*5c23704eSSong GaoTRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
3714*5c23704eSSong GaoTRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
3715*5c23704eSSong GaoTRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
3716*5c23704eSSong GaoTRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv)
3717*5c23704eSSong GaoTRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv)
3718*5c23704eSSong GaoTRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv)
3719*5c23704eSSong GaoTRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv)
3720*5c23704eSSong GaoTRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari)
3721*5c23704eSSong GaoTRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari)
3722*5c23704eSSong GaoTRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari)
3723*5c23704eSSong GaoTRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari)
3724*5c23704eSSong Gao
3725*5c23704eSSong GaoTRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
3726*5c23704eSSong GaoTRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
3727*5c23704eSSong GaoTRANS(vrotr_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_rotrv)
3728*5c23704eSSong GaoTRANS(vrotr_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_rotrv)
3729*5c23704eSSong GaoTRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
3730*5c23704eSSong GaoTRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
3731*5c23704eSSong GaoTRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
3732*5c23704eSSong GaoTRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
3733*5c23704eSSong GaoTRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv)
3734*5c23704eSSong GaoTRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv)
3735*5c23704eSSong GaoTRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv)
3736*5c23704eSSong GaoTRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv)
3737*5c23704eSSong GaoTRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri)
3738*5c23704eSSong GaoTRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri)
3739*5c23704eSSong GaoTRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri)
3740*5c23704eSSong GaoTRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri)
3741*5c23704eSSong Gao
3742*5c23704eSSong GaoTRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b)
3743*5c23704eSSong GaoTRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h)
3744*5c23704eSSong GaoTRANS(vsllwil_d_w, LSX, gen_vv_i, gen_helper_vsllwil_d_w)
3745*5c23704eSSong GaoTRANS(vextl_q_d, LSX, gen_vv, gen_helper_vextl_q_d)
3746*5c23704eSSong GaoTRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu)
3747*5c23704eSSong GaoTRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu)
3748*5c23704eSSong GaoTRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu)
3749*5c23704eSSong GaoTRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du)
3750*5c23704eSSong GaoTRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b)
3751*5c23704eSSong GaoTRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h)
3752*5c23704eSSong GaoTRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w)
3753*5c23704eSSong GaoTRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d)
3754*5c23704eSSong GaoTRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu)
3755*5c23704eSSong GaoTRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu)
3756*5c23704eSSong GaoTRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu)
3757*5c23704eSSong GaoTRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du)
3758*5c23704eSSong Gao
3759*5c23704eSSong GaoTRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b)
3760*5c23704eSSong GaoTRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h)
3761*5c23704eSSong GaoTRANS(vsrlr_w, LSX, gen_vvv, gen_helper_vsrlr_w)
3762*5c23704eSSong GaoTRANS(vsrlr_d, LSX, gen_vvv, gen_helper_vsrlr_d)
3763*5c23704eSSong GaoTRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b)
3764*5c23704eSSong GaoTRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h)
3765*5c23704eSSong GaoTRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w)
3766*5c23704eSSong GaoTRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d)
3767*5c23704eSSong GaoTRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b)
3768*5c23704eSSong GaoTRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h)
3769*5c23704eSSong GaoTRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w)
3770*5c23704eSSong GaoTRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d)
3771*5c23704eSSong GaoTRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b)
3772*5c23704eSSong GaoTRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h)
3773*5c23704eSSong GaoTRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w)
3774*5c23704eSSong GaoTRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d)
3775*5c23704eSSong Gao
3776*5c23704eSSong GaoTRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b)
3777*5c23704eSSong GaoTRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h)
3778*5c23704eSSong GaoTRANS(vsrar_w, LSX, gen_vvv, gen_helper_vsrar_w)
3779*5c23704eSSong GaoTRANS(vsrar_d, LSX, gen_vvv, gen_helper_vsrar_d)
3780*5c23704eSSong GaoTRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b)
3781*5c23704eSSong GaoTRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h)
3782*5c23704eSSong GaoTRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w)
3783*5c23704eSSong GaoTRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d)
3784*5c23704eSSong GaoTRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b)
3785*5c23704eSSong GaoTRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h)
3786*5c23704eSSong GaoTRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w)
3787*5c23704eSSong GaoTRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d)
3788*5c23704eSSong GaoTRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b)
3789*5c23704eSSong GaoTRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h)
3790*5c23704eSSong GaoTRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w)
3791*5c23704eSSong GaoTRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d)
3792*5c23704eSSong Gao
3793*5c23704eSSong GaoTRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h)
3794*5c23704eSSong GaoTRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w)
3795*5c23704eSSong GaoTRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d)
3796*5c23704eSSong GaoTRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h)
3797*5c23704eSSong GaoTRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w)
3798*5c23704eSSong GaoTRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d)
3799*5c23704eSSong GaoTRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h)
3800*5c23704eSSong GaoTRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w)
3801*5c23704eSSong GaoTRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d)
3802*5c23704eSSong GaoTRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h)
3803*5c23704eSSong GaoTRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w)
3804*5c23704eSSong GaoTRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d)
3805*5c23704eSSong Gao
3806*5c23704eSSong GaoTRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h)
3807*5c23704eSSong GaoTRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w)
3808*5c23704eSSong GaoTRANS(vsrlni_w_d, LSX, gen_vv_i, gen_helper_vsrlni_w_d)
3809*5c23704eSSong GaoTRANS(vsrlni_d_q, LSX, gen_vv_i, gen_helper_vsrlni_d_q)
3810*5c23704eSSong GaoTRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h)
3811*5c23704eSSong GaoTRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w)
3812*5c23704eSSong GaoTRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d)
3813*5c23704eSSong GaoTRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q)
3814*5c23704eSSong GaoTRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h)
3815*5c23704eSSong GaoTRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w)
3816*5c23704eSSong GaoTRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d)
3817*5c23704eSSong GaoTRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q)
3818*5c23704eSSong GaoTRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h)
3819*5c23704eSSong GaoTRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w)
3820*5c23704eSSong GaoTRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d)
3821*5c23704eSSong GaoTRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q)
3822*5c23704eSSong Gao
3823*5c23704eSSong GaoTRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h)
3824*5c23704eSSong GaoTRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w)
3825*5c23704eSSong GaoTRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d)
3826*5c23704eSSong GaoTRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h)
3827*5c23704eSSong GaoTRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w)
3828*5c23704eSSong GaoTRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d)
3829*5c23704eSSong GaoTRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h)
3830*5c23704eSSong GaoTRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w)
3831*5c23704eSSong GaoTRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d)
3832*5c23704eSSong GaoTRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h)
3833*5c23704eSSong GaoTRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w)
3834*5c23704eSSong GaoTRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d)
3835*5c23704eSSong Gao
3836*5c23704eSSong GaoTRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h)
3837*5c23704eSSong GaoTRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w)
3838*5c23704eSSong GaoTRANS(vsrlrni_w_d, LSX, gen_vv_i, gen_helper_vsrlrni_w_d)
3839*5c23704eSSong GaoTRANS(vsrlrni_d_q, LSX, gen_vv_i, gen_helper_vsrlrni_d_q)
3840*5c23704eSSong GaoTRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h)
3841*5c23704eSSong GaoTRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w)
3842*5c23704eSSong GaoTRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d)
3843*5c23704eSSong GaoTRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q)
3844*5c23704eSSong GaoTRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h)
3845*5c23704eSSong GaoTRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w)
3846*5c23704eSSong GaoTRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d)
3847*5c23704eSSong GaoTRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q)
3848*5c23704eSSong GaoTRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h)
3849*5c23704eSSong GaoTRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w)
3850*5c23704eSSong GaoTRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d)
3851*5c23704eSSong GaoTRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q)
3852*5c23704eSSong Gao
3853*5c23704eSSong GaoTRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h)
3854*5c23704eSSong GaoTRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w)
3855*5c23704eSSong GaoTRANS(vssrln_w_d, LSX, gen_vvv, gen_helper_vssrln_w_d)
3856*5c23704eSSong GaoTRANS(vssran_b_h, LSX, gen_vvv, gen_helper_vssran_b_h)
3857*5c23704eSSong GaoTRANS(vssran_h_w, LSX, gen_vvv, gen_helper_vssran_h_w)
3858*5c23704eSSong GaoTRANS(vssran_w_d, LSX, gen_vvv, gen_helper_vssran_w_d)
3859*5c23704eSSong GaoTRANS(vssrln_bu_h, LSX, gen_vvv, gen_helper_vssrln_bu_h)
3860*5c23704eSSong GaoTRANS(vssrln_hu_w, LSX, gen_vvv, gen_helper_vssrln_hu_w)
3861*5c23704eSSong GaoTRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d)
3862*5c23704eSSong GaoTRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h)
3863*5c23704eSSong GaoTRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w)
3864*5c23704eSSong GaoTRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d)
3865*5c23704eSSong GaoTRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h)
3866*5c23704eSSong GaoTRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w)
3867*5c23704eSSong GaoTRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d)
3868*5c23704eSSong GaoTRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h)
3869*5c23704eSSong GaoTRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w)
3870*5c23704eSSong GaoTRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d)
3871*5c23704eSSong GaoTRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h)
3872*5c23704eSSong GaoTRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w)
3873*5c23704eSSong GaoTRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d)
3874*5c23704eSSong GaoTRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h)
3875*5c23704eSSong GaoTRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w)
3876*5c23704eSSong GaoTRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d)
3877*5c23704eSSong Gao
3878*5c23704eSSong GaoTRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h)
3879*5c23704eSSong GaoTRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w)
3880*5c23704eSSong GaoTRANS(vssrlni_w_d, LSX, gen_vv_i, gen_helper_vssrlni_w_d)
3881*5c23704eSSong GaoTRANS(vssrlni_d_q, LSX, gen_vv_i, gen_helper_vssrlni_d_q)
3882*5c23704eSSong GaoTRANS(vssrani_b_h, LSX, gen_vv_i, gen_helper_vssrani_b_h)
3883*5c23704eSSong GaoTRANS(vssrani_h_w, LSX, gen_vv_i, gen_helper_vssrani_h_w)
3884*5c23704eSSong GaoTRANS(vssrani_w_d, LSX, gen_vv_i, gen_helper_vssrani_w_d)
3885*5c23704eSSong GaoTRANS(vssrani_d_q, LSX, gen_vv_i, gen_helper_vssrani_d_q)
3886*5c23704eSSong GaoTRANS(vssrlni_bu_h, LSX, gen_vv_i, gen_helper_vssrlni_bu_h)
3887*5c23704eSSong GaoTRANS(vssrlni_hu_w, LSX, gen_vv_i, gen_helper_vssrlni_hu_w)
3888*5c23704eSSong GaoTRANS(vssrlni_wu_d, LSX, gen_vv_i, gen_helper_vssrlni_wu_d)
3889*5c23704eSSong GaoTRANS(vssrlni_du_q, LSX, gen_vv_i, gen_helper_vssrlni_du_q)
3890*5c23704eSSong GaoTRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h)
3891*5c23704eSSong GaoTRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w)
3892*5c23704eSSong GaoTRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d)
3893*5c23704eSSong GaoTRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q)
3894*5c23704eSSong GaoTRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h)
3895*5c23704eSSong GaoTRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w)
3896*5c23704eSSong GaoTRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d)
3897*5c23704eSSong GaoTRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q)
3898*5c23704eSSong GaoTRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h)
3899*5c23704eSSong GaoTRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w)
3900*5c23704eSSong GaoTRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d)
3901*5c23704eSSong GaoTRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q)
3902*5c23704eSSong GaoTRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h)
3903*5c23704eSSong GaoTRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w)
3904*5c23704eSSong GaoTRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d)
3905*5c23704eSSong GaoTRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q)
3906*5c23704eSSong GaoTRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h)
3907*5c23704eSSong GaoTRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w)
3908*5c23704eSSong GaoTRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d)
3909*5c23704eSSong GaoTRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q)
3910*5c23704eSSong Gao
3911*5c23704eSSong GaoTRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h)
3912*5c23704eSSong GaoTRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w)
3913*5c23704eSSong GaoTRANS(vssrlrn_w_d, LSX, gen_vvv, gen_helper_vssrlrn_w_d)
3914*5c23704eSSong GaoTRANS(vssrarn_b_h, LSX, gen_vvv, gen_helper_vssrarn_b_h)
3915*5c23704eSSong GaoTRANS(vssrarn_h_w, LSX, gen_vvv, gen_helper_vssrarn_h_w)
3916*5c23704eSSong GaoTRANS(vssrarn_w_d, LSX, gen_vvv, gen_helper_vssrarn_w_d)
3917*5c23704eSSong GaoTRANS(vssrlrn_bu_h, LSX, gen_vvv, gen_helper_vssrlrn_bu_h)
3918*5c23704eSSong GaoTRANS(vssrlrn_hu_w, LSX, gen_vvv, gen_helper_vssrlrn_hu_w)
3919*5c23704eSSong GaoTRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d)
3920*5c23704eSSong GaoTRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h)
3921*5c23704eSSong GaoTRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w)
3922*5c23704eSSong GaoTRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d)
3923*5c23704eSSong GaoTRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h)
3924*5c23704eSSong GaoTRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w)
3925*5c23704eSSong GaoTRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d)
3926*5c23704eSSong GaoTRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h)
3927*5c23704eSSong GaoTRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w)
3928*5c23704eSSong GaoTRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d)
3929*5c23704eSSong GaoTRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h)
3930*5c23704eSSong GaoTRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w)
3931*5c23704eSSong GaoTRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d)
3932*5c23704eSSong GaoTRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h)
3933*5c23704eSSong GaoTRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w)
3934*5c23704eSSong GaoTRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d)
3935*5c23704eSSong Gao
3936*5c23704eSSong GaoTRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h)
3937*5c23704eSSong GaoTRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w)
3938*5c23704eSSong GaoTRANS(vssrlrni_w_d, LSX, gen_vv_i, gen_helper_vssrlrni_w_d)
3939*5c23704eSSong GaoTRANS(vssrlrni_d_q, LSX, gen_vv_i, gen_helper_vssrlrni_d_q)
3940*5c23704eSSong GaoTRANS(vssrarni_b_h, LSX, gen_vv_i, gen_helper_vssrarni_b_h)
3941*5c23704eSSong GaoTRANS(vssrarni_h_w, LSX, gen_vv_i, gen_helper_vssrarni_h_w)
3942*5c23704eSSong GaoTRANS(vssrarni_w_d, LSX, gen_vv_i, gen_helper_vssrarni_w_d)
3943*5c23704eSSong GaoTRANS(vssrarni_d_q, LSX, gen_vv_i, gen_helper_vssrarni_d_q)
3944*5c23704eSSong GaoTRANS(vssrlrni_bu_h, LSX, gen_vv_i, gen_helper_vssrlrni_bu_h)
3945*5c23704eSSong GaoTRANS(vssrlrni_hu_w, LSX, gen_vv_i, gen_helper_vssrlrni_hu_w)
3946*5c23704eSSong GaoTRANS(vssrlrni_wu_d, LSX, gen_vv_i, gen_helper_vssrlrni_wu_d)
3947*5c23704eSSong GaoTRANS(vssrlrni_du_q, LSX, gen_vv_i, gen_helper_vssrlrni_du_q)
3948*5c23704eSSong GaoTRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h)
3949*5c23704eSSong GaoTRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w)
3950*5c23704eSSong GaoTRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d)
3951*5c23704eSSong GaoTRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q)
3952*5c23704eSSong GaoTRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h)
3953*5c23704eSSong GaoTRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w)
3954*5c23704eSSong GaoTRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d)
3955*5c23704eSSong GaoTRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q)
3956*5c23704eSSong GaoTRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h)
3957*5c23704eSSong GaoTRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w)
3958*5c23704eSSong GaoTRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d)
3959*5c23704eSSong GaoTRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q)
3960*5c23704eSSong GaoTRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h)
3961*5c23704eSSong GaoTRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w)
3962*5c23704eSSong GaoTRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d)
3963*5c23704eSSong GaoTRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q)
3964*5c23704eSSong GaoTRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h)
3965*5c23704eSSong GaoTRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w)
3966*5c23704eSSong GaoTRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d)
3967*5c23704eSSong GaoTRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q)
3968*5c23704eSSong Gao
3969*5c23704eSSong GaoTRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b)
3970*5c23704eSSong GaoTRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h)
3971*5c23704eSSong GaoTRANS(vclo_w, LSX, gen_vv, gen_helper_vclo_w)
3972*5c23704eSSong GaoTRANS(vclo_d, LSX, gen_vv, gen_helper_vclo_d)
3973*5c23704eSSong GaoTRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b)
3974*5c23704eSSong GaoTRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h)
3975*5c23704eSSong GaoTRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w)
3976*5c23704eSSong GaoTRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d)
3977*5c23704eSSong GaoTRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b)
3978*5c23704eSSong GaoTRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h)
3979*5c23704eSSong GaoTRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w)
3980*5c23704eSSong GaoTRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d)
3981*5c23704eSSong GaoTRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b)
3982*5c23704eSSong GaoTRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h)
3983*5c23704eSSong GaoTRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w)
3984*5c23704eSSong GaoTRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d)
3985*5c23704eSSong Gao
3986*5c23704eSSong GaoTRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
3987*5c23704eSSong GaoTRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
3988*5c23704eSSong GaoTRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w)
3989*5c23704eSSong GaoTRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d)
3990*5c23704eSSong GaoTRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b)
3991*5c23704eSSong GaoTRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h)
3992*5c23704eSSong GaoTRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w)
3993*5c23704eSSong GaoTRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d)
3994*5c23704eSSong Gao
3995*5c23704eSSong Gaostatic void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
3996*5c23704eSSong Gao                    void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
3997*5c23704eSSong Gao{
3998*5c23704eSSong Gao    TCGv_vec mask, lsh, t1, one;
3999*5c23704eSSong Gao
4000*5c23704eSSong Gao    lsh = tcg_temp_new_vec_matching(t);
4001*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(t);
4002*5c23704eSSong Gao    mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1);
4003*5c23704eSSong Gao    one = tcg_constant_vec_matching(t, vece, 1);
4004*5c23704eSSong Gao
4005*5c23704eSSong Gao    tcg_gen_and_vec(vece, lsh, b, mask);
4006*5c23704eSSong Gao    tcg_gen_shlv_vec(vece, t1, one, lsh);
4007*5c23704eSSong Gao    func(vece, t, a, t1);
4008*5c23704eSSong Gao}
4009*5c23704eSSong Gao
4010*5c23704eSSong Gaostatic void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
4011*5c23704eSSong Gao{
4012*5c23704eSSong Gao    do_vbit(vece, t, a, b, tcg_gen_andc_vec);
4013*5c23704eSSong Gao}
4014*5c23704eSSong Gao
4015*5c23704eSSong Gaostatic void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
4016*5c23704eSSong Gao{
4017*5c23704eSSong Gao    do_vbit(vece, t, a, b, tcg_gen_or_vec);
4018*5c23704eSSong Gao}
4019*5c23704eSSong Gao
4020*5c23704eSSong Gaostatic void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
4021*5c23704eSSong Gao{
4022*5c23704eSSong Gao    do_vbit(vece, t, a, b, tcg_gen_xor_vec);
4023*5c23704eSSong Gao}
4024*5c23704eSSong Gao
4025*5c23704eSSong Gaostatic void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4026*5c23704eSSong Gao                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
4027*5c23704eSSong Gao{
4028*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
4029*5c23704eSSong Gao        INDEX_op_shlv_vec, INDEX_op_andc_vec, 0
4030*5c23704eSSong Gao        };
4031*5c23704eSSong Gao    static const GVecGen3 op[4] = {
4032*5c23704eSSong Gao        {
4033*5c23704eSSong Gao            .fniv = gen_vbitclr,
4034*5c23704eSSong Gao            .fno = gen_helper_vbitclr_b,
4035*5c23704eSSong Gao            .opt_opc = vecop_list,
4036*5c23704eSSong Gao            .vece = MO_8
4037*5c23704eSSong Gao        },
4038*5c23704eSSong Gao        {
4039*5c23704eSSong Gao            .fniv = gen_vbitclr,
4040*5c23704eSSong Gao            .fno = gen_helper_vbitclr_h,
4041*5c23704eSSong Gao            .opt_opc = vecop_list,
4042*5c23704eSSong Gao            .vece = MO_16
4043*5c23704eSSong Gao        },
4044*5c23704eSSong Gao        {
4045*5c23704eSSong Gao            .fniv = gen_vbitclr,
4046*5c23704eSSong Gao            .fno = gen_helper_vbitclr_w,
4047*5c23704eSSong Gao            .opt_opc = vecop_list,
4048*5c23704eSSong Gao            .vece = MO_32
4049*5c23704eSSong Gao        },
4050*5c23704eSSong Gao        {
4051*5c23704eSSong Gao            .fniv = gen_vbitclr,
4052*5c23704eSSong Gao            .fno = gen_helper_vbitclr_d,
4053*5c23704eSSong Gao            .opt_opc = vecop_list,
4054*5c23704eSSong Gao            .vece = MO_64
4055*5c23704eSSong Gao        },
4056*5c23704eSSong Gao    };
4057*5c23704eSSong Gao
4058*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
4059*5c23704eSSong Gao}
4060*5c23704eSSong Gao
4061*5c23704eSSong GaoTRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr)
4062*5c23704eSSong GaoTRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr)
4063*5c23704eSSong GaoTRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr)
4064*5c23704eSSong GaoTRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr)
4065*5c23704eSSong GaoTRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr)
4066*5c23704eSSong GaoTRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr)
4067*5c23704eSSong GaoTRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr)
4068*5c23704eSSong GaoTRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr)
4069*5c23704eSSong Gao
4070*5c23704eSSong Gaostatic void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
4071*5c23704eSSong Gao                     void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
4072*5c23704eSSong Gao{
4073*5c23704eSSong Gao    int lsh;
4074*5c23704eSSong Gao    TCGv_vec t1, one;
4075*5c23704eSSong Gao
4076*5c23704eSSong Gao    lsh = imm & ((8 << vece) -1);
4077*5c23704eSSong Gao    t1 = tcg_temp_new_vec_matching(t);
4078*5c23704eSSong Gao    one = tcg_constant_vec_matching(t, vece, 1);
4079*5c23704eSSong Gao
4080*5c23704eSSong Gao    tcg_gen_shli_vec(vece, t1, one, lsh);
4081*5c23704eSSong Gao    func(vece, t, a, t1);
4082*5c23704eSSong Gao}
4083*5c23704eSSong Gao
4084*5c23704eSSong Gaostatic void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4085*5c23704eSSong Gao{
4086*5c23704eSSong Gao    do_vbiti(vece, t, a, imm, tcg_gen_andc_vec);
4087*5c23704eSSong Gao}
4088*5c23704eSSong Gao
4089*5c23704eSSong Gaostatic void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4090*5c23704eSSong Gao{
4091*5c23704eSSong Gao    do_vbiti(vece, t, a, imm, tcg_gen_or_vec);
4092*5c23704eSSong Gao}
4093*5c23704eSSong Gao
4094*5c23704eSSong Gaostatic void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4095*5c23704eSSong Gao{
4096*5c23704eSSong Gao    do_vbiti(vece, t, a, imm, tcg_gen_xor_vec);
4097*5c23704eSSong Gao}
4098*5c23704eSSong Gao
4099*5c23704eSSong Gaostatic void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4100*5c23704eSSong Gao                        int64_t imm, uint32_t oprsz, uint32_t maxsz)
4101*5c23704eSSong Gao{
4102*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
4103*5c23704eSSong Gao        INDEX_op_shli_vec, INDEX_op_andc_vec, 0
4104*5c23704eSSong Gao        };
4105*5c23704eSSong Gao    static const GVecGen2i op[4] = {
4106*5c23704eSSong Gao        {
4107*5c23704eSSong Gao            .fniv = gen_vbitclri,
4108*5c23704eSSong Gao            .fnoi = gen_helper_vbitclri_b,
4109*5c23704eSSong Gao            .opt_opc = vecop_list,
4110*5c23704eSSong Gao            .vece = MO_8
4111*5c23704eSSong Gao        },
4112*5c23704eSSong Gao        {
4113*5c23704eSSong Gao            .fniv = gen_vbitclri,
4114*5c23704eSSong Gao            .fnoi = gen_helper_vbitclri_h,
4115*5c23704eSSong Gao            .opt_opc = vecop_list,
4116*5c23704eSSong Gao            .vece = MO_16
4117*5c23704eSSong Gao        },
4118*5c23704eSSong Gao        {
4119*5c23704eSSong Gao            .fniv = gen_vbitclri,
4120*5c23704eSSong Gao            .fnoi = gen_helper_vbitclri_w,
4121*5c23704eSSong Gao            .opt_opc = vecop_list,
4122*5c23704eSSong Gao            .vece = MO_32
4123*5c23704eSSong Gao        },
4124*5c23704eSSong Gao        {
4125*5c23704eSSong Gao            .fniv = gen_vbitclri,
4126*5c23704eSSong Gao            .fnoi = gen_helper_vbitclri_d,
4127*5c23704eSSong Gao            .opt_opc = vecop_list,
4128*5c23704eSSong Gao            .vece = MO_64
4129*5c23704eSSong Gao        },
4130*5c23704eSSong Gao    };
4131*5c23704eSSong Gao
4132*5c23704eSSong Gao    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
4133*5c23704eSSong Gao}
4134*5c23704eSSong Gao
4135*5c23704eSSong GaoTRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri)
4136*5c23704eSSong GaoTRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri)
4137*5c23704eSSong GaoTRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri)
4138*5c23704eSSong GaoTRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri)
4139*5c23704eSSong GaoTRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri)
4140*5c23704eSSong GaoTRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri)
4141*5c23704eSSong GaoTRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri)
4142*5c23704eSSong GaoTRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri)
4143*5c23704eSSong Gao
4144*5c23704eSSong Gaostatic void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4145*5c23704eSSong Gao                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
4146*5c23704eSSong Gao{
4147*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
4148*5c23704eSSong Gao        INDEX_op_shlv_vec, 0
4149*5c23704eSSong Gao        };
4150*5c23704eSSong Gao    static const GVecGen3 op[4] = {
4151*5c23704eSSong Gao        {
4152*5c23704eSSong Gao            .fniv = gen_vbitset,
4153*5c23704eSSong Gao            .fno = gen_helper_vbitset_b,
4154*5c23704eSSong Gao            .opt_opc = vecop_list,
4155*5c23704eSSong Gao            .vece = MO_8
4156*5c23704eSSong Gao        },
4157*5c23704eSSong Gao        {
4158*5c23704eSSong Gao            .fniv = gen_vbitset,
4159*5c23704eSSong Gao            .fno = gen_helper_vbitset_h,
4160*5c23704eSSong Gao            .opt_opc = vecop_list,
4161*5c23704eSSong Gao            .vece = MO_16
4162*5c23704eSSong Gao        },
4163*5c23704eSSong Gao        {
4164*5c23704eSSong Gao            .fniv = gen_vbitset,
4165*5c23704eSSong Gao            .fno = gen_helper_vbitset_w,
4166*5c23704eSSong Gao            .opt_opc = vecop_list,
4167*5c23704eSSong Gao            .vece = MO_32
4168*5c23704eSSong Gao        },
4169*5c23704eSSong Gao        {
4170*5c23704eSSong Gao            .fniv = gen_vbitset,
4171*5c23704eSSong Gao            .fno = gen_helper_vbitset_d,
4172*5c23704eSSong Gao            .opt_opc = vecop_list,
4173*5c23704eSSong Gao            .vece = MO_64
4174*5c23704eSSong Gao        },
4175*5c23704eSSong Gao    };
4176*5c23704eSSong Gao
4177*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
4178*5c23704eSSong Gao}
4179*5c23704eSSong Gao
4180*5c23704eSSong GaoTRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset)
4181*5c23704eSSong GaoTRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset)
4182*5c23704eSSong GaoTRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset)
4183*5c23704eSSong GaoTRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset)
4184*5c23704eSSong GaoTRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset)
4185*5c23704eSSong GaoTRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset)
4186*5c23704eSSong GaoTRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset)
4187*5c23704eSSong GaoTRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset)
4188*5c23704eSSong Gao
4189*5c23704eSSong Gaostatic void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4190*5c23704eSSong Gao                        int64_t imm, uint32_t oprsz, uint32_t maxsz)
4191*5c23704eSSong Gao{
4192*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
4193*5c23704eSSong Gao        INDEX_op_shli_vec, 0
4194*5c23704eSSong Gao        };
4195*5c23704eSSong Gao    static const GVecGen2i op[4] = {
4196*5c23704eSSong Gao        {
4197*5c23704eSSong Gao            .fniv = gen_vbitseti,
4198*5c23704eSSong Gao            .fnoi = gen_helper_vbitseti_b,
4199*5c23704eSSong Gao            .opt_opc = vecop_list,
4200*5c23704eSSong Gao            .vece = MO_8
4201*5c23704eSSong Gao        },
4202*5c23704eSSong Gao        {
4203*5c23704eSSong Gao            .fniv = gen_vbitseti,
4204*5c23704eSSong Gao            .fnoi = gen_helper_vbitseti_h,
4205*5c23704eSSong Gao            .opt_opc = vecop_list,
4206*5c23704eSSong Gao            .vece = MO_16
4207*5c23704eSSong Gao        },
4208*5c23704eSSong Gao        {
4209*5c23704eSSong Gao            .fniv = gen_vbitseti,
4210*5c23704eSSong Gao            .fnoi = gen_helper_vbitseti_w,
4211*5c23704eSSong Gao            .opt_opc = vecop_list,
4212*5c23704eSSong Gao            .vece = MO_32
4213*5c23704eSSong Gao        },
4214*5c23704eSSong Gao        {
4215*5c23704eSSong Gao            .fniv = gen_vbitseti,
4216*5c23704eSSong Gao            .fnoi = gen_helper_vbitseti_d,
4217*5c23704eSSong Gao            .opt_opc = vecop_list,
4218*5c23704eSSong Gao            .vece = MO_64
4219*5c23704eSSong Gao        },
4220*5c23704eSSong Gao    };
4221*5c23704eSSong Gao
4222*5c23704eSSong Gao    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
4223*5c23704eSSong Gao}
4224*5c23704eSSong Gao
4225*5c23704eSSong GaoTRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti)
4226*5c23704eSSong GaoTRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti)
4227*5c23704eSSong GaoTRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti)
4228*5c23704eSSong GaoTRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti)
4229*5c23704eSSong GaoTRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti)
4230*5c23704eSSong GaoTRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti)
4231*5c23704eSSong GaoTRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti)
4232*5c23704eSSong GaoTRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti)
4233*5c23704eSSong Gao
4234*5c23704eSSong Gaostatic void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4235*5c23704eSSong Gao                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
4236*5c23704eSSong Gao{
4237*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
4238*5c23704eSSong Gao        INDEX_op_shlv_vec, 0
4239*5c23704eSSong Gao        };
4240*5c23704eSSong Gao    static const GVecGen3 op[4] = {
4241*5c23704eSSong Gao        {
4242*5c23704eSSong Gao            .fniv = gen_vbitrev,
4243*5c23704eSSong Gao            .fno = gen_helper_vbitrev_b,
4244*5c23704eSSong Gao            .opt_opc = vecop_list,
4245*5c23704eSSong Gao            .vece = MO_8
4246*5c23704eSSong Gao        },
4247*5c23704eSSong Gao        {
4248*5c23704eSSong Gao            .fniv = gen_vbitrev,
4249*5c23704eSSong Gao            .fno = gen_helper_vbitrev_h,
4250*5c23704eSSong Gao            .opt_opc = vecop_list,
4251*5c23704eSSong Gao            .vece = MO_16
4252*5c23704eSSong Gao        },
4253*5c23704eSSong Gao        {
4254*5c23704eSSong Gao            .fniv = gen_vbitrev,
4255*5c23704eSSong Gao            .fno = gen_helper_vbitrev_w,
4256*5c23704eSSong Gao            .opt_opc = vecop_list,
4257*5c23704eSSong Gao            .vece = MO_32
4258*5c23704eSSong Gao        },
4259*5c23704eSSong Gao        {
4260*5c23704eSSong Gao            .fniv = gen_vbitrev,
4261*5c23704eSSong Gao            .fno = gen_helper_vbitrev_d,
4262*5c23704eSSong Gao            .opt_opc = vecop_list,
4263*5c23704eSSong Gao            .vece = MO_64
4264*5c23704eSSong Gao        },
4265*5c23704eSSong Gao    };
4266*5c23704eSSong Gao
4267*5c23704eSSong Gao    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
4268*5c23704eSSong Gao}
4269*5c23704eSSong Gao
4270*5c23704eSSong GaoTRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev)
4271*5c23704eSSong GaoTRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev)
4272*5c23704eSSong GaoTRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev)
4273*5c23704eSSong GaoTRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev)
4274*5c23704eSSong GaoTRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev)
4275*5c23704eSSong GaoTRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev)
4276*5c23704eSSong GaoTRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev)
4277*5c23704eSSong GaoTRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev)
4278*5c23704eSSong Gao
4279*5c23704eSSong Gaostatic void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4280*5c23704eSSong Gao                        int64_t imm, uint32_t oprsz, uint32_t maxsz)
4281*5c23704eSSong Gao{
4282*5c23704eSSong Gao    static const TCGOpcode vecop_list[] = {
4283*5c23704eSSong Gao        INDEX_op_shli_vec, 0
4284*5c23704eSSong Gao        };
4285*5c23704eSSong Gao    static const GVecGen2i op[4] = {
4286*5c23704eSSong Gao        {
4287*5c23704eSSong Gao            .fniv = gen_vbitrevi,
4288*5c23704eSSong Gao            .fnoi = gen_helper_vbitrevi_b,
4289*5c23704eSSong Gao            .opt_opc = vecop_list,
4290*5c23704eSSong Gao            .vece = MO_8
4291*5c23704eSSong Gao        },
4292*5c23704eSSong Gao        {
4293*5c23704eSSong Gao            .fniv = gen_vbitrevi,
4294*5c23704eSSong Gao            .fnoi = gen_helper_vbitrevi_h,
4295*5c23704eSSong Gao            .opt_opc = vecop_list,
4296*5c23704eSSong Gao            .vece = MO_16
4297*5c23704eSSong Gao        },
4298*5c23704eSSong Gao        {
4299*5c23704eSSong Gao            .fniv = gen_vbitrevi,
4300*5c23704eSSong Gao            .fnoi = gen_helper_vbitrevi_w,
4301*5c23704eSSong Gao            .opt_opc = vecop_list,
4302*5c23704eSSong Gao            .vece = MO_32
4303*5c23704eSSong Gao        },
4304*5c23704eSSong Gao        {
4305*5c23704eSSong Gao            .fniv = gen_vbitrevi,
4306*5c23704eSSong Gao            .fnoi = gen_helper_vbitrevi_d,
4307*5c23704eSSong Gao            .opt_opc = vecop_list,
4308*5c23704eSSong Gao            .vece = MO_64
4309*5c23704eSSong Gao        },
4310*5c23704eSSong Gao    };
4311*5c23704eSSong Gao
4312*5c23704eSSong Gao    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
4313*5c23704eSSong Gao}
4314*5c23704eSSong Gao
4315*5c23704eSSong GaoTRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi)
4316*5c23704eSSong GaoTRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi)
4317*5c23704eSSong GaoTRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi)
4318*5c23704eSSong GaoTRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi)
4319*5c23704eSSong GaoTRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi)
4320*5c23704eSSong GaoTRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi)
4321*5c23704eSSong GaoTRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi)
4322*5c23704eSSong GaoTRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi)
4323*5c23704eSSong Gao
4324*5c23704eSSong GaoTRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
4325*5c23704eSSong GaoTRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
4326*5c23704eSSong GaoTRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
4327*5c23704eSSong GaoTRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
4328*5c23704eSSong GaoTRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b)
4329*5c23704eSSong GaoTRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h)
4330*5c23704eSSong GaoTRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b)
4331*5c23704eSSong GaoTRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h)
4332*5c23704eSSong Gao
4333*5c23704eSSong GaoTRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
4334*5c23704eSSong GaoTRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
4335*5c23704eSSong GaoTRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s)
4336*5c23704eSSong GaoTRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d)
4337*5c23704eSSong GaoTRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
4338*5c23704eSSong GaoTRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
4339*5c23704eSSong GaoTRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
4340*5c23704eSSong GaoTRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
4341*5c23704eSSong GaoTRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s)
4342*5c23704eSSong GaoTRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d)
4343*5c23704eSSong GaoTRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s)
4344*5c23704eSSong GaoTRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d)
4345*5c23704eSSong GaoTRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s)
4346*5c23704eSSong GaoTRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d)
4347*5c23704eSSong GaoTRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s)
4348*5c23704eSSong GaoTRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d)
4349*5c23704eSSong Gao
4350*5c23704eSSong GaoTRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
4351*5c23704eSSong GaoTRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
4352*5c23704eSSong GaoTRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s)
4353*5c23704eSSong GaoTRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d)
4354*5c23704eSSong GaoTRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
4355*5c23704eSSong GaoTRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
4356*5c23704eSSong GaoTRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
4357*5c23704eSSong GaoTRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
4358*5c23704eSSong GaoTRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s)
4359*5c23704eSSong GaoTRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d)
4360*5c23704eSSong GaoTRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s)
4361*5c23704eSSong GaoTRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d)
4362*5c23704eSSong GaoTRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s)
4363*5c23704eSSong GaoTRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d)
4364*5c23704eSSong GaoTRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s)
4365*5c23704eSSong GaoTRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d)
4366*5c23704eSSong Gao
4367*5c23704eSSong GaoTRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
4368*5c23704eSSong GaoTRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
4369*5c23704eSSong GaoTRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
4370*5c23704eSSong GaoTRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
4371*5c23704eSSong GaoTRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s)
4372*5c23704eSSong GaoTRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d)
4373*5c23704eSSong GaoTRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s)
4374*5c23704eSSong GaoTRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d)
4375*5c23704eSSong Gao
4376*5c23704eSSong GaoTRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
4377*5c23704eSSong GaoTRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
4378*5c23704eSSong GaoTRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
4379*5c23704eSSong GaoTRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
4380*5c23704eSSong GaoTRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s)
4381*5c23704eSSong GaoTRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d)
4382*5c23704eSSong GaoTRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s)
4383*5c23704eSSong GaoTRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d)
4384*5c23704eSSong Gao
4385*5c23704eSSong GaoTRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
4386*5c23704eSSong GaoTRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
4387*5c23704eSSong GaoTRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s)
4388*5c23704eSSong GaoTRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d)
4389*5c23704eSSong Gao
4390*5c23704eSSong GaoTRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
4391*5c23704eSSong GaoTRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
4392*5c23704eSSong GaoTRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s)
4393*5c23704eSSong GaoTRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d)
4394*5c23704eSSong Gao
4395*5c23704eSSong GaoTRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
4396*5c23704eSSong GaoTRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
4397*5c23704eSSong GaoTRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
4398*5c23704eSSong GaoTRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
4399*5c23704eSSong GaoTRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
4400*5c23704eSSong GaoTRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
4401*5c23704eSSong GaoTRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s)
4402*5c23704eSSong GaoTRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d)
4403*5c23704eSSong GaoTRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s)
4404*5c23704eSSong GaoTRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d)
4405*5c23704eSSong GaoTRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
4406*5c23704eSSong GaoTRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
4407*5c23704eSSong Gao
4408*5c23704eSSong GaoTRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
4409*5c23704eSSong GaoTRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
4410*5c23704eSSong GaoTRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
4411*5c23704eSSong GaoTRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
4412*5c23704eSSong GaoTRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
4413*5c23704eSSong GaoTRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
4414*5c23704eSSong GaoTRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h)
4415*5c23704eSSong GaoTRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h)
4416*5c23704eSSong GaoTRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s)
4417*5c23704eSSong GaoTRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s)
4418*5c23704eSSong GaoTRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s)
4419*5c23704eSSong GaoTRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d)
4420*5c23704eSSong Gao
4421*5c23704eSSong GaoTRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
4422*5c23704eSSong GaoTRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
4423*5c23704eSSong GaoTRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s)
4424*5c23704eSSong GaoTRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d)
4425*5c23704eSSong GaoTRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s)
4426*5c23704eSSong GaoTRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d)
4427*5c23704eSSong GaoTRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
4428*5c23704eSSong GaoTRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
4429*5c23704eSSong GaoTRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
4430*5c23704eSSong GaoTRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
4431*5c23704eSSong GaoTRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s)
4432*5c23704eSSong GaoTRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d)
4433*5c23704eSSong GaoTRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s)
4434*5c23704eSSong GaoTRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d)
4435*5c23704eSSong GaoTRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s)
4436*5c23704eSSong GaoTRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d)
4437*5c23704eSSong GaoTRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s)
4438*5c23704eSSong GaoTRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d)
4439*5c23704eSSong GaoTRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s)
4440*5c23704eSSong GaoTRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d)
4441*5c23704eSSong Gao
4442*5c23704eSSong GaoTRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
4443*5c23704eSSong GaoTRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
4444*5c23704eSSong GaoTRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s)
4445*5c23704eSSong GaoTRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d)
4446*5c23704eSSong GaoTRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s)
4447*5c23704eSSong GaoTRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d)
4448*5c23704eSSong GaoTRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s)
4449*5c23704eSSong GaoTRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d)
4450*5c23704eSSong GaoTRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s)
4451*5c23704eSSong GaoTRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d)
4452*5c23704eSSong GaoTRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s)
4453*5c23704eSSong GaoTRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d)
4454*5c23704eSSong GaoTRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s)
4455*5c23704eSSong GaoTRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d)
4456*5c23704eSSong GaoTRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
4457*5c23704eSSong GaoTRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
4458*5c23704eSSong GaoTRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
4459*5c23704eSSong GaoTRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
4460*5c23704eSSong GaoTRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
4461*5c23704eSSong GaoTRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s)
4462*5c23704eSSong GaoTRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s)
4463*5c23704eSSong GaoTRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s)
4464*5c23704eSSong GaoTRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s)
4465*5c23704eSSong GaoTRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s)
4466*5c23704eSSong GaoTRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s)
4467*5c23704eSSong GaoTRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
4468*5c23704eSSong GaoTRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
4469*5c23704eSSong GaoTRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
4470*5c23704eSSong GaoTRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
4471*5c23704eSSong GaoTRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s)
4472*5c23704eSSong GaoTRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d)
4473*5c23704eSSong GaoTRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s)
4474*5c23704eSSong GaoTRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d)
4475*5c23704eSSong GaoTRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s)
4476*5c23704eSSong GaoTRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d)
4477*5c23704eSSong GaoTRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s)
4478*5c23704eSSong GaoTRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d)
4479*5c23704eSSong GaoTRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s)
4480*5c23704eSSong GaoTRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d)
4481*5c23704eSSong GaoTRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s)
4482*5c23704eSSong GaoTRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d)
4483*5c23704eSSong GaoTRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s)
4484*5c23704eSSong GaoTRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d)
4485*5c23704eSSong GaoTRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d)
4486*5c23704eSSong GaoTRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d)
4487*5c23704eSSong GaoTRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d)
4488*5c23704eSSong GaoTRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d)
4489*5c23704eSSong GaoTRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d)
4490*5c23704eSSong GaoTRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s)
4491*5c23704eSSong GaoTRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s)
4492*5c23704eSSong GaoTRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s)
4493*5c23704eSSong GaoTRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s)
4494*5c23704eSSong GaoTRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s)
4495*5c23704eSSong GaoTRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s)
4496*5c23704eSSong GaoTRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s)
4497*5c23704eSSong GaoTRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s)
4498*5c23704eSSong GaoTRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s)
4499*5c23704eSSong GaoTRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s)
4500*5c23704eSSong Gao
4501*5c23704eSSong GaoTRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
4502*5c23704eSSong GaoTRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
4503*5c23704eSSong GaoTRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu)
4504*5c23704eSSong GaoTRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
4505*5c23704eSSong GaoTRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
4506*5c23704eSSong GaoTRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
4507*5c23704eSSong GaoTRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
4508*5c23704eSSong GaoTRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w)
4509*5c23704eSSong GaoTRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l)
4510*5c23704eSSong GaoTRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu)
4511*5c23704eSSong GaoTRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu)
4512*5c23704eSSong GaoTRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w)
4513*5c23704eSSong GaoTRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w)
4514*5c23704eSSong GaoTRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l)
4515*5c23704eSSong Gao
4516*5c23704eSSong Gaostatic bool do_cmp_vl(DisasContext *ctx, arg_vvv *a,
4517*5c23704eSSong Gao                      uint32_t oprsz, MemOp mop, TCGCond cond)
4518*5c23704eSSong Gao{
4519*5c23704eSSong Gao    uint32_t vd_ofs, vj_ofs, vk_ofs;
4520*5c23704eSSong Gao
4521*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
4522*5c23704eSSong Gao        return true;
4523*5c23704eSSong Gao    }
4524*5c23704eSSong Gao
4525*5c23704eSSong Gao    vd_ofs = vec_full_offset(a->vd);
4526*5c23704eSSong Gao    vj_ofs = vec_full_offset(a->vj);
4527*5c23704eSSong Gao    vk_ofs = vec_full_offset(a->vk);
4528*5c23704eSSong Gao
4529*5c23704eSSong Gao    tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
4530*5c23704eSSong Gao    return true;
4531*5c23704eSSong Gao}
4532*5c23704eSSong Gao
4533*5c23704eSSong Gaostatic bool do_cmp(DisasContext *ctx, arg_vvv *a,
4534*5c23704eSSong Gao                   MemOp mop, TCGCond cond)
4535*5c23704eSSong Gao{
4536*5c23704eSSong Gao    return do_cmp_vl(ctx, a, 16, mop, cond);
4537*5c23704eSSong Gao}
4538*5c23704eSSong Gao
4539*5c23704eSSong Gaostatic bool do_xcmp(DisasContext *ctx, arg_vvv *a,
4540*5c23704eSSong Gao                    MemOp mop, TCGCond cond)
4541*5c23704eSSong Gao{
4542*5c23704eSSong Gao    return do_cmp_vl(ctx, a, 32, mop, cond);
4543*5c23704eSSong Gao}
4544*5c23704eSSong Gao
4545*5c23704eSSong Gaostatic bool do_cmpi_vl(DisasContext *ctx, arg_vv_i *a,
4546*5c23704eSSong Gao                       uint32_t oprsz, MemOp mop, TCGCond cond)
4547*5c23704eSSong Gao{
4548*5c23704eSSong Gao    uint32_t vd_ofs, vj_ofs;
4549*5c23704eSSong Gao
4550*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
4551*5c23704eSSong Gao        return true;
4552*5c23704eSSong Gao    }
4553*5c23704eSSong Gao
4554*5c23704eSSong Gao    vd_ofs = vec_full_offset(a->vd);
4555*5c23704eSSong Gao    vj_ofs = vec_full_offset(a->vj);
4556*5c23704eSSong Gao
4557*5c23704eSSong Gao    tcg_gen_gvec_cmpi(cond, mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
4558*5c23704eSSong Gao    return true;
4559*5c23704eSSong Gao}
4560*5c23704eSSong Gao
4561*5c23704eSSong Gaostatic bool do_cmpi(DisasContext *ctx, arg_vv_i *a,
4562*5c23704eSSong Gao                    MemOp mop, TCGCond cond)
4563*5c23704eSSong Gao{
4564*5c23704eSSong Gao    return do_cmpi_vl(ctx, a, 16, mop, cond);
4565*5c23704eSSong Gao}
4566*5c23704eSSong Gao
4567*5c23704eSSong Gaostatic bool do_xcmpi(DisasContext *ctx, arg_vv_i *a,
4568*5c23704eSSong Gao                     MemOp mop, TCGCond cond)
4569*5c23704eSSong Gao{
4570*5c23704eSSong Gao    return do_cmpi_vl(ctx, a, 32, mop, cond);
4571*5c23704eSSong Gao}
4572*5c23704eSSong Gao
4573*5c23704eSSong GaoTRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ)
4574*5c23704eSSong GaoTRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ)
4575*5c23704eSSong GaoTRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ)
4576*5c23704eSSong GaoTRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ)
4577*5c23704eSSong GaoTRANS(vseqi_b, LSX, do_cmpi, MO_8, TCG_COND_EQ)
4578*5c23704eSSong GaoTRANS(vseqi_h, LSX, do_cmpi, MO_16, TCG_COND_EQ)
4579*5c23704eSSong GaoTRANS(vseqi_w, LSX, do_cmpi, MO_32, TCG_COND_EQ)
4580*5c23704eSSong GaoTRANS(vseqi_d, LSX, do_cmpi, MO_64, TCG_COND_EQ)
4581*5c23704eSSong GaoTRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ)
4582*5c23704eSSong GaoTRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ)
4583*5c23704eSSong GaoTRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ)
4584*5c23704eSSong GaoTRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ)
4585*5c23704eSSong GaoTRANS(xvseqi_b, LASX, do_xcmpi, MO_8, TCG_COND_EQ)
4586*5c23704eSSong GaoTRANS(xvseqi_h, LASX, do_xcmpi, MO_16, TCG_COND_EQ)
4587*5c23704eSSong GaoTRANS(xvseqi_w, LASX, do_xcmpi, MO_32, TCG_COND_EQ)
4588*5c23704eSSong GaoTRANS(xvseqi_d, LASX, do_xcmpi, MO_64, TCG_COND_EQ)
4589*5c23704eSSong Gao
4590*5c23704eSSong GaoTRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE)
4591*5c23704eSSong GaoTRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE)
4592*5c23704eSSong GaoTRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE)
4593*5c23704eSSong GaoTRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE)
4594*5c23704eSSong GaoTRANS(vslei_b, LSX, do_cmpi, MO_8, TCG_COND_LE)
4595*5c23704eSSong GaoTRANS(vslei_h, LSX, do_cmpi, MO_16, TCG_COND_LE)
4596*5c23704eSSong GaoTRANS(vslei_w, LSX, do_cmpi, MO_32, TCG_COND_LE)
4597*5c23704eSSong GaoTRANS(vslei_d, LSX, do_cmpi, MO_64, TCG_COND_LE)
4598*5c23704eSSong GaoTRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU)
4599*5c23704eSSong GaoTRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU)
4600*5c23704eSSong GaoTRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU)
4601*5c23704eSSong GaoTRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU)
4602*5c23704eSSong GaoTRANS(vslei_bu, LSX, do_cmpi, MO_8, TCG_COND_LEU)
4603*5c23704eSSong GaoTRANS(vslei_hu, LSX, do_cmpi, MO_16, TCG_COND_LEU)
4604*5c23704eSSong GaoTRANS(vslei_wu, LSX, do_cmpi, MO_32, TCG_COND_LEU)
4605*5c23704eSSong GaoTRANS(vslei_du, LSX, do_cmpi, MO_64, TCG_COND_LEU)
4606*5c23704eSSong GaoTRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE)
4607*5c23704eSSong GaoTRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE)
4608*5c23704eSSong GaoTRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE)
4609*5c23704eSSong GaoTRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE)
4610*5c23704eSSong GaoTRANS(xvslei_b, LASX, do_xcmpi, MO_8, TCG_COND_LE)
4611*5c23704eSSong GaoTRANS(xvslei_h, LASX, do_xcmpi, MO_16, TCG_COND_LE)
4612*5c23704eSSong GaoTRANS(xvslei_w, LASX, do_xcmpi, MO_32, TCG_COND_LE)
4613*5c23704eSSong GaoTRANS(xvslei_d, LASX, do_xcmpi, MO_64, TCG_COND_LE)
4614*5c23704eSSong GaoTRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU)
4615*5c23704eSSong GaoTRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU)
4616*5c23704eSSong GaoTRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU)
4617*5c23704eSSong GaoTRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU)
4618*5c23704eSSong GaoTRANS(xvslei_bu, LASX, do_xcmpi, MO_8, TCG_COND_LEU)
4619*5c23704eSSong GaoTRANS(xvslei_hu, LASX, do_xcmpi, MO_16, TCG_COND_LEU)
4620*5c23704eSSong GaoTRANS(xvslei_wu, LASX, do_xcmpi, MO_32, TCG_COND_LEU)
4621*5c23704eSSong GaoTRANS(xvslei_du, LASX, do_xcmpi, MO_64, TCG_COND_LEU)
4622*5c23704eSSong Gao
4623*5c23704eSSong GaoTRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT)
4624*5c23704eSSong GaoTRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT)
4625*5c23704eSSong GaoTRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT)
4626*5c23704eSSong GaoTRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT)
4627*5c23704eSSong GaoTRANS(vslti_b, LSX, do_cmpi, MO_8, TCG_COND_LT)
4628*5c23704eSSong GaoTRANS(vslti_h, LSX, do_cmpi, MO_16, TCG_COND_LT)
4629*5c23704eSSong GaoTRANS(vslti_w, LSX, do_cmpi, MO_32, TCG_COND_LT)
4630*5c23704eSSong GaoTRANS(vslti_d, LSX, do_cmpi, MO_64, TCG_COND_LT)
4631*5c23704eSSong GaoTRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU)
4632*5c23704eSSong GaoTRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU)
4633*5c23704eSSong GaoTRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU)
4634*5c23704eSSong GaoTRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU)
4635*5c23704eSSong GaoTRANS(vslti_bu, LSX, do_cmpi, MO_8, TCG_COND_LTU)
4636*5c23704eSSong GaoTRANS(vslti_hu, LSX, do_cmpi, MO_16, TCG_COND_LTU)
4637*5c23704eSSong GaoTRANS(vslti_wu, LSX, do_cmpi, MO_32, TCG_COND_LTU)
4638*5c23704eSSong GaoTRANS(vslti_du, LSX, do_cmpi, MO_64, TCG_COND_LTU)
4639*5c23704eSSong GaoTRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT)
4640*5c23704eSSong GaoTRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT)
4641*5c23704eSSong GaoTRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT)
4642*5c23704eSSong GaoTRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT)
4643*5c23704eSSong GaoTRANS(xvslti_b, LASX, do_xcmpi, MO_8, TCG_COND_LT)
4644*5c23704eSSong GaoTRANS(xvslti_h, LASX, do_xcmpi, MO_16, TCG_COND_LT)
4645*5c23704eSSong GaoTRANS(xvslti_w, LASX, do_xcmpi, MO_32, TCG_COND_LT)
4646*5c23704eSSong GaoTRANS(xvslti_d, LASX, do_xcmpi, MO_64, TCG_COND_LT)
4647*5c23704eSSong GaoTRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU)
4648*5c23704eSSong GaoTRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU)
4649*5c23704eSSong GaoTRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU)
4650*5c23704eSSong GaoTRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU)
4651*5c23704eSSong GaoTRANS(xvslti_bu, LASX, do_xcmpi, MO_8, TCG_COND_LTU)
4652*5c23704eSSong GaoTRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU)
4653*5c23704eSSong GaoTRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU)
4654*5c23704eSSong GaoTRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU)
4655*5c23704eSSong Gao
4656*5c23704eSSong Gaostatic bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
4657*5c23704eSSong Gao{
4658*5c23704eSSong Gao    uint32_t flags;
4659*5c23704eSSong Gao    void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
4660*5c23704eSSong Gao    TCGv_i32 vd = tcg_constant_i32(a->vd);
4661*5c23704eSSong Gao    TCGv_i32 vj = tcg_constant_i32(a->vj);
4662*5c23704eSSong Gao    TCGv_i32 vk = tcg_constant_i32(a->vk);
4663*5c23704eSSong Gao    TCGv_i32 oprsz = tcg_constant_i32(sz);
4664*5c23704eSSong Gao
4665*5c23704eSSong Gao    if (!check_vec(ctx, sz)) {
4666*5c23704eSSong Gao        return true;
4667*5c23704eSSong Gao    }
4668*5c23704eSSong Gao
4669*5c23704eSSong Gao    fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
4670*5c23704eSSong Gao    flags = get_fcmp_flags(a->fcond >> 1);
4671*5c23704eSSong Gao    fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
4672*5c23704eSSong Gao
4673*5c23704eSSong Gao    return true;
4674*5c23704eSSong Gao}
4675*5c23704eSSong Gao
4676*5c23704eSSong Gaostatic bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
4677*5c23704eSSong Gao{
4678*5c23704eSSong Gao    uint32_t flags;
4679*5c23704eSSong Gao    void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
4680*5c23704eSSong Gao    TCGv_i32 vd = tcg_constant_i32(a->vd);
4681*5c23704eSSong Gao    TCGv_i32 vj = tcg_constant_i32(a->vj);
4682*5c23704eSSong Gao    TCGv_i32 vk = tcg_constant_i32(a->vk);
4683*5c23704eSSong Gao    TCGv_i32 oprsz = tcg_constant_i32(sz);
4684*5c23704eSSong Gao
4685*5c23704eSSong Gao    if (!check_vec(ctx, sz)) {
4686*5c23704eSSong Gao        return true;
4687*5c23704eSSong Gao    }
4688*5c23704eSSong Gao
4689*5c23704eSSong Gao    fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
4690*5c23704eSSong Gao    flags = get_fcmp_flags(a->fcond >> 1);
4691*5c23704eSSong Gao    fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
4692*5c23704eSSong Gao
4693*5c23704eSSong Gao    return true;
4694*5c23704eSSong Gao}
4695*5c23704eSSong Gao
4696*5c23704eSSong GaoTRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16)
4697*5c23704eSSong GaoTRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16)
4698*5c23704eSSong GaoTRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32)
4699*5c23704eSSong GaoTRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32)
4700*5c23704eSSong Gao
4701*5c23704eSSong Gaostatic bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz)
4702*5c23704eSSong Gao{
4703*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
4704*5c23704eSSong Gao        return true;
4705*5c23704eSSong Gao    }
4706*5c23704eSSong Gao
4707*5c23704eSSong Gao    tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
4708*5c23704eSSong Gao                        vec_full_offset(a->vk), vec_full_offset(a->vj),
4709*5c23704eSSong Gao                        oprsz, ctx->vl / 8);
4710*5c23704eSSong Gao    return true;
4711*5c23704eSSong Gao}
4712*5c23704eSSong Gao
4713*5c23704eSSong GaoTRANS(vbitsel_v, LSX, do_vbitsel_v, 16)
4714*5c23704eSSong GaoTRANS(xvbitsel_v, LASX, do_vbitsel_v, 32)
4715*5c23704eSSong Gao
4716*5c23704eSSong Gaostatic void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
4717*5c23704eSSong Gao{
4718*5c23704eSSong Gao    tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
4719*5c23704eSSong Gao}
4720*5c23704eSSong Gao
4721*5c23704eSSong Gaostatic bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
4722*5c23704eSSong Gao{
4723*5c23704eSSong Gao    static const GVecGen2i op = {
4724*5c23704eSSong Gao       .fniv = gen_vbitseli,
4725*5c23704eSSong Gao       .fnoi = gen_helper_vbitseli_b,
4726*5c23704eSSong Gao       .vece = MO_8,
4727*5c23704eSSong Gao       .load_dest = true
4728*5c23704eSSong Gao    };
4729*5c23704eSSong Gao
4730*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
4731*5c23704eSSong Gao        return true;
4732*5c23704eSSong Gao    }
4733*5c23704eSSong Gao
4734*5c23704eSSong Gao    tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
4735*5c23704eSSong Gao                    oprsz, ctx->vl / 8, a->imm , &op);
4736*5c23704eSSong Gao    return true;
4737*5c23704eSSong Gao}
4738*5c23704eSSong Gao
4739*5c23704eSSong GaoTRANS(vbitseli_b, LSX, do_vbitseli_b, 16)
4740*5c23704eSSong GaoTRANS(xvbitseli_b, LASX, do_vbitseli_b, 32)
4741*5c23704eSSong Gao
4742*5c23704eSSong Gao#define VSET(NAME, COND)                                                       \
4743*5c23704eSSong Gaostatic bool trans_## NAME (DisasContext *ctx, arg_cv *a)                       \
4744*5c23704eSSong Gao{                                                                              \
4745*5c23704eSSong Gao    TCGv_i64 t1, al, ah;                                                       \
4746*5c23704eSSong Gao                                                                               \
4747*5c23704eSSong Gao    al = tcg_temp_new_i64();                                                   \
4748*5c23704eSSong Gao    ah = tcg_temp_new_i64();                                                   \
4749*5c23704eSSong Gao    t1 = tcg_temp_new_i64();                                                   \
4750*5c23704eSSong Gao                                                                               \
4751*5c23704eSSong Gao    get_vreg64(ah, a->vj, 1);                                                  \
4752*5c23704eSSong Gao    get_vreg64(al, a->vj, 0);                                                  \
4753*5c23704eSSong Gao                                                                               \
4754*5c23704eSSong Gao    if (!avail_LSX(ctx)) {                                                     \
4755*5c23704eSSong Gao        return false;                                                          \
4756*5c23704eSSong Gao    }                                                                          \
4757*5c23704eSSong Gao                                                                               \
4758*5c23704eSSong Gao    if (!check_vec(ctx, 16)) {                                                 \
4759*5c23704eSSong Gao        return true;                                                           \
4760*5c23704eSSong Gao    }                                                                          \
4761*5c23704eSSong Gao                                                                               \
4762*5c23704eSSong Gao    tcg_gen_or_i64(t1, al, ah);                                                \
4763*5c23704eSSong Gao    tcg_gen_setcondi_i64(COND, t1, t1, 0);                                     \
4764*5c23704eSSong Gao    tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
4765*5c23704eSSong Gao                                                                               \
4766*5c23704eSSong Gao    return true;                                                               \
4767*5c23704eSSong Gao}
4768*5c23704eSSong Gao
4769*5c23704eSSong GaoVSET(vseteqz_v, TCG_COND_EQ)
4770*5c23704eSSong GaoVSET(vsetnez_v, TCG_COND_NE)
4771*5c23704eSSong Gao
4772*5c23704eSSong GaoTRANS(vsetanyeqz_b, LSX, gen_cv, gen_helper_vsetanyeqz_b)
4773*5c23704eSSong GaoTRANS(vsetanyeqz_h, LSX, gen_cv, gen_helper_vsetanyeqz_h)
4774*5c23704eSSong GaoTRANS(vsetanyeqz_w, LSX, gen_cv, gen_helper_vsetanyeqz_w)
4775*5c23704eSSong GaoTRANS(vsetanyeqz_d, LSX, gen_cv, gen_helper_vsetanyeqz_d)
4776*5c23704eSSong GaoTRANS(vsetallnez_b, LSX, gen_cv, gen_helper_vsetallnez_b)
4777*5c23704eSSong GaoTRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h)
4778*5c23704eSSong GaoTRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w)
4779*5c23704eSSong GaoTRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d)
4780*5c23704eSSong Gao
4781*5c23704eSSong Gao#define XVSET(NAME, COND)                                                      \
4782*5c23704eSSong Gaostatic bool trans_## NAME(DisasContext *ctx, arg_cv * a)                       \
4783*5c23704eSSong Gao{                                                                              \
4784*5c23704eSSong Gao    TCGv_i64 t1, t2, d[4];                                                     \
4785*5c23704eSSong Gao                                                                               \
4786*5c23704eSSong Gao    d[0] = tcg_temp_new_i64();                                                 \
4787*5c23704eSSong Gao    d[1] = tcg_temp_new_i64();                                                 \
4788*5c23704eSSong Gao    d[2] = tcg_temp_new_i64();                                                 \
4789*5c23704eSSong Gao    d[3] = tcg_temp_new_i64();                                                 \
4790*5c23704eSSong Gao    t1 = tcg_temp_new_i64();                                                   \
4791*5c23704eSSong Gao    t2 = tcg_temp_new_i64();                                                   \
4792*5c23704eSSong Gao                                                                               \
4793*5c23704eSSong Gao    get_vreg64(d[0], a->vj, 0);                                                \
4794*5c23704eSSong Gao    get_vreg64(d[1], a->vj, 1);                                                \
4795*5c23704eSSong Gao    get_vreg64(d[2], a->vj, 2);                                                \
4796*5c23704eSSong Gao    get_vreg64(d[3], a->vj, 3);                                                \
4797*5c23704eSSong Gao                                                                               \
4798*5c23704eSSong Gao    if (!avail_LASX(ctx)) {                                                    \
4799*5c23704eSSong Gao        return false;                                                          \
4800*5c23704eSSong Gao    }                                                                          \
4801*5c23704eSSong Gao                                                                               \
4802*5c23704eSSong Gao    if (!check_vec(ctx, 32)) {                                                 \
4803*5c23704eSSong Gao        return true;                                                           \
4804*5c23704eSSong Gao    }                                                                          \
4805*5c23704eSSong Gao                                                                               \
4806*5c23704eSSong Gao    tcg_gen_or_i64(t1, d[0], d[1]);                                            \
4807*5c23704eSSong Gao    tcg_gen_or_i64(t2, d[2], d[3]);                                            \
4808*5c23704eSSong Gao    tcg_gen_or_i64(t1, t2, t1);                                                \
4809*5c23704eSSong Gao    tcg_gen_setcondi_i64(COND, t1, t1, 0);                                     \
4810*5c23704eSSong Gao    tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
4811*5c23704eSSong Gao                                                                               \
4812*5c23704eSSong Gao    return true;                                                               \
4813*5c23704eSSong Gao}
4814*5c23704eSSong Gao
4815*5c23704eSSong GaoXVSET(xvseteqz_v, TCG_COND_EQ)
4816*5c23704eSSong GaoXVSET(xvsetnez_v, TCG_COND_NE)
4817*5c23704eSSong Gao
4818*5c23704eSSong GaoTRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b)
4819*5c23704eSSong GaoTRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h)
4820*5c23704eSSong GaoTRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w)
4821*5c23704eSSong GaoTRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d)
4822*5c23704eSSong GaoTRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b)
4823*5c23704eSSong GaoTRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h)
4824*5c23704eSSong GaoTRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w)
4825*5c23704eSSong GaoTRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d)
4826*5c23704eSSong Gao
4827*5c23704eSSong Gaostatic bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop,
4828*5c23704eSSong Gao                       void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4829*5c23704eSSong Gao{
4830*5c23704eSSong Gao    TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4831*5c23704eSSong Gao
4832*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
4833*5c23704eSSong Gao        return true;
4834*5c23704eSSong Gao    }
4835*5c23704eSSong Gao
4836*5c23704eSSong Gao    func(src, tcg_env, vec_reg_offset(a->vd, a->imm, mop));
4837*5c23704eSSong Gao
4838*5c23704eSSong Gao    return true;
4839*5c23704eSSong Gao}
4840*5c23704eSSong Gao
4841*5c23704eSSong Gaostatic bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop,
4842*5c23704eSSong Gao                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4843*5c23704eSSong Gao{
4844*5c23704eSSong Gao    return gen_g2v_vl(ctx, a, 16, mop, func);
4845*5c23704eSSong Gao}
4846*5c23704eSSong Gao
4847*5c23704eSSong Gaostatic bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop,
4848*5c23704eSSong Gao                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4849*5c23704eSSong Gao{
4850*5c23704eSSong Gao    return gen_g2v_vl(ctx, a, 32, mop, func);
4851*5c23704eSSong Gao}
4852*5c23704eSSong Gao
4853*5c23704eSSong GaoTRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64)
4854*5c23704eSSong GaoTRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64)
4855*5c23704eSSong GaoTRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64)
4856*5c23704eSSong GaoTRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64)
4857*5c23704eSSong GaoTRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64)
4858*5c23704eSSong GaoTRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64)
4859*5c23704eSSong Gao
4860*5c23704eSSong Gaostatic bool gen_v2g_vl(DisasContext *ctx, arg_rv_i *a, uint32_t oprsz, MemOp mop,
4861*5c23704eSSong Gao                       void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4862*5c23704eSSong Gao{
4863*5c23704eSSong Gao    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4864*5c23704eSSong Gao
4865*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
4866*5c23704eSSong Gao        return true;
4867*5c23704eSSong Gao    }
4868*5c23704eSSong Gao
4869*5c23704eSSong Gao    func(dst, tcg_env, vec_reg_offset(a->vj, a->imm, mop));
4870*5c23704eSSong Gao
4871*5c23704eSSong Gao    return true;
4872*5c23704eSSong Gao}
4873*5c23704eSSong Gao
4874*5c23704eSSong Gaostatic bool gen_v2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
4875*5c23704eSSong Gao                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4876*5c23704eSSong Gao{
4877*5c23704eSSong Gao    return gen_v2g_vl(ctx, a, 16, mop, func);
4878*5c23704eSSong Gao}
4879*5c23704eSSong Gao
4880*5c23704eSSong Gaostatic bool gen_x2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
4881*5c23704eSSong Gao                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4882*5c23704eSSong Gao{
4883*5c23704eSSong Gao    return gen_v2g_vl(ctx, a, 32, mop, func);
4884*5c23704eSSong Gao}
4885*5c23704eSSong Gao
4886*5c23704eSSong GaoTRANS(vpickve2gr_b, LSX, gen_v2g, MO_8, tcg_gen_ld8s_i64)
4887*5c23704eSSong GaoTRANS(vpickve2gr_h, LSX, gen_v2g, MO_16, tcg_gen_ld16s_i64)
4888*5c23704eSSong GaoTRANS(vpickve2gr_w, LSX, gen_v2g, MO_32, tcg_gen_ld32s_i64)
4889*5c23704eSSong GaoTRANS(vpickve2gr_d, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
4890*5c23704eSSong GaoTRANS(vpickve2gr_bu, LSX, gen_v2g, MO_8, tcg_gen_ld8u_i64)
4891*5c23704eSSong GaoTRANS(vpickve2gr_hu, LSX, gen_v2g, MO_16, tcg_gen_ld16u_i64)
4892*5c23704eSSong GaoTRANS(vpickve2gr_wu, LSX, gen_v2g, MO_32, tcg_gen_ld32u_i64)
4893*5c23704eSSong GaoTRANS(vpickve2gr_du, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
4894*5c23704eSSong GaoTRANS(xvpickve2gr_w, LASX, gen_x2g, MO_32, tcg_gen_ld32s_i64)
4895*5c23704eSSong GaoTRANS(xvpickve2gr_d, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
4896*5c23704eSSong GaoTRANS(xvpickve2gr_wu, LASX, gen_x2g, MO_32, tcg_gen_ld32u_i64)
4897*5c23704eSSong GaoTRANS(xvpickve2gr_du, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
4898*5c23704eSSong Gao
4899*5c23704eSSong Gaostatic bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
4900*5c23704eSSong Gao                        uint32_t oprsz, MemOp mop)
4901*5c23704eSSong Gao{
4902*5c23704eSSong Gao    TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4903*5c23704eSSong Gao
4904*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
4905*5c23704eSSong Gao        return true;
4906*5c23704eSSong Gao    }
4907*5c23704eSSong Gao
4908*5c23704eSSong Gao    tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
4909*5c23704eSSong Gao                         oprsz, ctx->vl/8, src);
4910*5c23704eSSong Gao    return true;
4911*5c23704eSSong Gao}
4912*5c23704eSSong Gao
4913*5c23704eSSong Gaostatic bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
4914*5c23704eSSong Gao{
4915*5c23704eSSong Gao    return gvec_dup_vl(ctx, a, 16, mop);
4916*5c23704eSSong Gao}
4917*5c23704eSSong Gao
4918*5c23704eSSong Gaostatic bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop)
4919*5c23704eSSong Gao{
4920*5c23704eSSong Gao    return gvec_dup_vl(ctx, a, 32, mop);
4921*5c23704eSSong Gao}
4922*5c23704eSSong Gao
4923*5c23704eSSong GaoTRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
4924*5c23704eSSong GaoTRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
4925*5c23704eSSong GaoTRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
4926*5c23704eSSong GaoTRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
4927*5c23704eSSong GaoTRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8)
4928*5c23704eSSong GaoTRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16)
4929*5c23704eSSong GaoTRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32)
4930*5c23704eSSong GaoTRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64)
4931*5c23704eSSong Gao
4932*5c23704eSSong Gaostatic bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
4933*5c23704eSSong Gao{
4934*5c23704eSSong Gao    if (!avail_LSX(ctx)) {
4935*5c23704eSSong Gao        return false;
4936*5c23704eSSong Gao    }
4937*5c23704eSSong Gao
4938*5c23704eSSong Gao    if (!check_vec(ctx, 16)) {
4939*5c23704eSSong Gao        return true;
4940*5c23704eSSong Gao    }
4941*5c23704eSSong Gao
4942*5c23704eSSong Gao    tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
4943*5c23704eSSong Gao                         offsetof(CPULoongArchState,
4944*5c23704eSSong Gao                                  fpr[a->vj].vreg.B((a->imm))),
4945*5c23704eSSong Gao                         16, ctx->vl/8);
4946*5c23704eSSong Gao    return true;
4947*5c23704eSSong Gao}
4948*5c23704eSSong Gao
4949*5c23704eSSong Gaostatic bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
4950*5c23704eSSong Gao{
4951*5c23704eSSong Gao    if (!avail_LSX(ctx)) {
4952*5c23704eSSong Gao        return false;
4953*5c23704eSSong Gao    }
4954*5c23704eSSong Gao
4955*5c23704eSSong Gao    if (!check_vec(ctx, 16)) {
4956*5c23704eSSong Gao        return true;
4957*5c23704eSSong Gao    }
4958*5c23704eSSong Gao
4959*5c23704eSSong Gao    tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
4960*5c23704eSSong Gao                         offsetof(CPULoongArchState,
4961*5c23704eSSong Gao                                  fpr[a->vj].vreg.H((a->imm))),
4962*5c23704eSSong Gao                         16, ctx->vl/8);
4963*5c23704eSSong Gao    return true;
4964*5c23704eSSong Gao}
4965*5c23704eSSong Gaostatic bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
4966*5c23704eSSong Gao{
4967*5c23704eSSong Gao    if (!avail_LSX(ctx)) {
4968*5c23704eSSong Gao        return false;
4969*5c23704eSSong Gao    }
4970*5c23704eSSong Gao
4971*5c23704eSSong Gao    if (!check_vec(ctx, 16)) {
4972*5c23704eSSong Gao        return true;
4973*5c23704eSSong Gao    }
4974*5c23704eSSong Gao
4975*5c23704eSSong Gao    tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
4976*5c23704eSSong Gao                         offsetof(CPULoongArchState,
4977*5c23704eSSong Gao                                  fpr[a->vj].vreg.W((a->imm))),
4978*5c23704eSSong Gao                        16, ctx->vl/8);
4979*5c23704eSSong Gao    return true;
4980*5c23704eSSong Gao}
4981*5c23704eSSong Gaostatic bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
4982*5c23704eSSong Gao{
4983*5c23704eSSong Gao    if (!avail_LSX(ctx)) {
4984*5c23704eSSong Gao        return false;
4985*5c23704eSSong Gao    }
4986*5c23704eSSong Gao
4987*5c23704eSSong Gao    if (!check_vec(ctx, 16)) {
4988*5c23704eSSong Gao        return true;
4989*5c23704eSSong Gao    }
4990*5c23704eSSong Gao
4991*5c23704eSSong Gao    tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
4992*5c23704eSSong Gao                         offsetof(CPULoongArchState,
4993*5c23704eSSong Gao                                  fpr[a->vj].vreg.D((a->imm))),
4994*5c23704eSSong Gao                         16, ctx->vl/8);
4995*5c23704eSSong Gao    return true;
4996*5c23704eSSong Gao}
4997*5c23704eSSong Gao
4998*5c23704eSSong Gaostatic bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
4999*5c23704eSSong Gao                           uint32_t oprsz, int vece, int bit,
5000*5c23704eSSong Gao                           void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
5001*5c23704eSSong Gao{
5002*5c23704eSSong Gao    int i;
5003*5c23704eSSong Gao    TCGv_i64 t0 = tcg_temp_new_i64();
5004*5c23704eSSong Gao    TCGv_ptr t1 = tcg_temp_new_ptr();
5005*5c23704eSSong Gao    TCGv_i64 t2 = tcg_temp_new_i64();
5006*5c23704eSSong Gao
5007*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
5008*5c23704eSSong Gao        return true;
5009*5c23704eSSong Gao    }
5010*5c23704eSSong Gao
5011*5c23704eSSong Gao    tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
5012*5c23704eSSong Gao    tcg_gen_shli_i64(t0, t0, vece);
5013*5c23704eSSong Gao    if (HOST_BIG_ENDIAN) {
5014*5c23704eSSong Gao        tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1));
5015*5c23704eSSong Gao    }
5016*5c23704eSSong Gao
5017*5c23704eSSong Gao    tcg_gen_trunc_i64_ptr(t1, t0);
5018*5c23704eSSong Gao    tcg_gen_add_ptr(t1, t1, tcg_env);
5019*5c23704eSSong Gao
5020*5c23704eSSong Gao    for (i = 0; i < oprsz; i += 16) {
5021*5c23704eSSong Gao        func(t2, t1, vec_full_offset(a->vj) + i);
5022*5c23704eSSong Gao        tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2);
5023*5c23704eSSong Gao    }
5024*5c23704eSSong Gao
5025*5c23704eSSong Gao    return true;
5026*5c23704eSSong Gao}
5027*5c23704eSSong Gao
5028*5c23704eSSong Gaostatic bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
5029*5c23704eSSong Gao                        void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
5030*5c23704eSSong Gao{
5031*5c23704eSSong Gao    return gen_vreplve_vl(ctx, a, 16, vece, bit, func);
5032*5c23704eSSong Gao}
5033*5c23704eSSong Gao
5034*5c23704eSSong Gaostatic bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
5035*5c23704eSSong Gao                         void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
5036*5c23704eSSong Gao{
5037*5c23704eSSong Gao    return gen_vreplve_vl(ctx, a, 32, vece, bit, func);
5038*5c23704eSSong Gao}
5039*5c23704eSSong Gao
5040*5c23704eSSong GaoTRANS(vreplve_b, LSX, gen_vreplve, MO_8,  8, tcg_gen_ld8u_i64)
5041*5c23704eSSong GaoTRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
5042*5c23704eSSong GaoTRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
5043*5c23704eSSong GaoTRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
5044*5c23704eSSong GaoTRANS(xvreplve_b, LASX, gen_xvreplve, MO_8,  8, tcg_gen_ld8u_i64)
5045*5c23704eSSong GaoTRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64)
5046*5c23704eSSong GaoTRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64)
5047*5c23704eSSong GaoTRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64)
5048*5c23704eSSong Gao
5049*5c23704eSSong Gaostatic bool gen_xvrepl128(DisasContext *ctx, arg_vv_i *a, MemOp mop)
5050*5c23704eSSong Gao{
5051*5c23704eSSong Gao    int i;
5052*5c23704eSSong Gao
5053*5c23704eSSong Gao    if (!check_vec(ctx, 32)) {
5054*5c23704eSSong Gao        return true;
5055*5c23704eSSong Gao    }
5056*5c23704eSSong Gao
5057*5c23704eSSong Gao    for (i = 0; i < 32; i += 16) {
5058*5c23704eSSong Gao        tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd) + i,
5059*5c23704eSSong Gao                             vec_reg_offset(a->vj, a->imm, mop) + i, 16, 16);
5060*5c23704eSSong Gao
5061*5c23704eSSong Gao    }
5062*5c23704eSSong Gao    return true;
5063*5c23704eSSong Gao}
5064*5c23704eSSong Gao
5065*5c23704eSSong GaoTRANS(xvrepl128vei_b, LASX, gen_xvrepl128, MO_8)
5066*5c23704eSSong GaoTRANS(xvrepl128vei_h, LASX, gen_xvrepl128, MO_16)
5067*5c23704eSSong GaoTRANS(xvrepl128vei_w, LASX, gen_xvrepl128, MO_32)
5068*5c23704eSSong GaoTRANS(xvrepl128vei_d, LASX, gen_xvrepl128, MO_64)
5069*5c23704eSSong Gao
5070*5c23704eSSong Gaostatic bool gen_xvreplve0(DisasContext *ctx, arg_vv *a, MemOp mop)
5071*5c23704eSSong Gao{
5072*5c23704eSSong Gao    if (!check_vec(ctx, 32)) {
5073*5c23704eSSong Gao        return true;
5074*5c23704eSSong Gao    }
5075*5c23704eSSong Gao
5076*5c23704eSSong Gao    tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd),
5077*5c23704eSSong Gao                         vec_full_offset(a->vj), 32, 32);
5078*5c23704eSSong Gao    return true;
5079*5c23704eSSong Gao}
5080*5c23704eSSong Gao
5081*5c23704eSSong GaoTRANS(xvreplve0_b, LASX, gen_xvreplve0, MO_8)
5082*5c23704eSSong GaoTRANS(xvreplve0_h, LASX, gen_xvreplve0, MO_16)
5083*5c23704eSSong GaoTRANS(xvreplve0_w, LASX, gen_xvreplve0, MO_32)
5084*5c23704eSSong GaoTRANS(xvreplve0_d, LASX, gen_xvreplve0, MO_64)
5085*5c23704eSSong GaoTRANS(xvreplve0_q, LASX, gen_xvreplve0, MO_128)
5086*5c23704eSSong Gao
5087*5c23704eSSong GaoTRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w)
5088*5c23704eSSong GaoTRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d)
5089*5c23704eSSong Gao
5090*5c23704eSSong GaoTRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w)
5091*5c23704eSSong GaoTRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d)
5092*5c23704eSSong Gao
5093*5c23704eSSong Gaostatic bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
5094*5c23704eSSong Gao{
5095*5c23704eSSong Gao    int i, ofs;
5096*5c23704eSSong Gao
5097*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
5098*5c23704eSSong Gao        return true;
5099*5c23704eSSong Gao    }
5100*5c23704eSSong Gao
5101*5c23704eSSong Gao    for (i = 0; i < oprsz / 16; i++) {
5102*5c23704eSSong Gao        TCGv desthigh = tcg_temp_new_i64();
5103*5c23704eSSong Gao        TCGv destlow = tcg_temp_new_i64();
5104*5c23704eSSong Gao        TCGv high = tcg_temp_new_i64();
5105*5c23704eSSong Gao        TCGv low = tcg_temp_new_i64();
5106*5c23704eSSong Gao
5107*5c23704eSSong Gao        get_vreg64(low, a->vj, 2 * i);
5108*5c23704eSSong Gao
5109*5c23704eSSong Gao        ofs = ((a->imm) & 0xf) * 8;
5110*5c23704eSSong Gao        if (ofs < 64) {
5111*5c23704eSSong Gao            get_vreg64(high, a->vj, 2 * i + 1);
5112*5c23704eSSong Gao            tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
5113*5c23704eSSong Gao            tcg_gen_shli_i64(destlow, low, ofs);
5114*5c23704eSSong Gao        } else {
5115*5c23704eSSong Gao            tcg_gen_shli_i64(desthigh, low, ofs - 64);
5116*5c23704eSSong Gao            destlow = tcg_constant_i64(0);
5117*5c23704eSSong Gao        }
5118*5c23704eSSong Gao        set_vreg64(desthigh, a->vd, 2 * i + 1);
5119*5c23704eSSong Gao        set_vreg64(destlow, a->vd, 2 * i);
5120*5c23704eSSong Gao    }
5121*5c23704eSSong Gao
5122*5c23704eSSong Gao    return true;
5123*5c23704eSSong Gao}
5124*5c23704eSSong Gao
5125*5c23704eSSong Gaostatic bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
5126*5c23704eSSong Gao{
5127*5c23704eSSong Gao    int i, ofs;
5128*5c23704eSSong Gao
5129*5c23704eSSong Gao    if (!check_vec(ctx, 32)) {
5130*5c23704eSSong Gao        return true;
5131*5c23704eSSong Gao    }
5132*5c23704eSSong Gao
5133*5c23704eSSong Gao    for (i = 0; i < oprsz / 16; i++) {
5134*5c23704eSSong Gao        TCGv desthigh = tcg_temp_new_i64();
5135*5c23704eSSong Gao        TCGv destlow = tcg_temp_new_i64();
5136*5c23704eSSong Gao        TCGv high = tcg_temp_new_i64();
5137*5c23704eSSong Gao        TCGv low = tcg_temp_new_i64();
5138*5c23704eSSong Gao        get_vreg64(high, a->vj, 2 * i + 1);
5139*5c23704eSSong Gao
5140*5c23704eSSong Gao        ofs = ((a->imm) & 0xf) * 8;
5141*5c23704eSSong Gao        if (ofs < 64) {
5142*5c23704eSSong Gao            get_vreg64(low, a->vj, 2 * i);
5143*5c23704eSSong Gao            tcg_gen_extract2_i64(destlow, low, high, ofs);
5144*5c23704eSSong Gao            tcg_gen_shri_i64(desthigh, high, ofs);
5145*5c23704eSSong Gao        } else {
5146*5c23704eSSong Gao            tcg_gen_shri_i64(destlow, high, ofs - 64);
5147*5c23704eSSong Gao            desthigh = tcg_constant_i64(0);
5148*5c23704eSSong Gao        }
5149*5c23704eSSong Gao        set_vreg64(desthigh, a->vd, 2 * i + 1);
5150*5c23704eSSong Gao        set_vreg64(destlow, a->vd, 2 * i);
5151*5c23704eSSong Gao    }
5152*5c23704eSSong Gao
5153*5c23704eSSong Gao    return true;
5154*5c23704eSSong Gao}
5155*5c23704eSSong Gao
5156*5c23704eSSong GaoTRANS(vbsll_v, LSX, do_vbsll_v, 16)
5157*5c23704eSSong GaoTRANS(vbsrl_v, LSX, do_vbsrl_v, 16)
5158*5c23704eSSong GaoTRANS(xvbsll_v, LASX, do_vbsll_v, 32)
5159*5c23704eSSong GaoTRANS(xvbsrl_v, LASX, do_vbsrl_v, 32)
5160*5c23704eSSong Gao
5161*5c23704eSSong GaoTRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b)
5162*5c23704eSSong GaoTRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h)
5163*5c23704eSSong GaoTRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w)
5164*5c23704eSSong GaoTRANS(vpackev_d, LSX, gen_vvv, gen_helper_vpackev_d)
5165*5c23704eSSong GaoTRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b)
5166*5c23704eSSong GaoTRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h)
5167*5c23704eSSong GaoTRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w)
5168*5c23704eSSong GaoTRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d)
5169*5c23704eSSong GaoTRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b)
5170*5c23704eSSong GaoTRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h)
5171*5c23704eSSong GaoTRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w)
5172*5c23704eSSong GaoTRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d)
5173*5c23704eSSong GaoTRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b)
5174*5c23704eSSong GaoTRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h)
5175*5c23704eSSong GaoTRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w)
5176*5c23704eSSong GaoTRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d)
5177*5c23704eSSong Gao
5178*5c23704eSSong GaoTRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b)
5179*5c23704eSSong GaoTRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h)
5180*5c23704eSSong GaoTRANS(vpickev_w, LSX, gen_vvv, gen_helper_vpickev_w)
5181*5c23704eSSong GaoTRANS(vpickev_d, LSX, gen_vvv, gen_helper_vpickev_d)
5182*5c23704eSSong GaoTRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b)
5183*5c23704eSSong GaoTRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h)
5184*5c23704eSSong GaoTRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w)
5185*5c23704eSSong GaoTRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d)
5186*5c23704eSSong GaoTRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b)
5187*5c23704eSSong GaoTRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h)
5188*5c23704eSSong GaoTRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w)
5189*5c23704eSSong GaoTRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d)
5190*5c23704eSSong GaoTRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b)
5191*5c23704eSSong GaoTRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h)
5192*5c23704eSSong GaoTRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w)
5193*5c23704eSSong GaoTRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d)
5194*5c23704eSSong Gao
5195*5c23704eSSong GaoTRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b)
5196*5c23704eSSong GaoTRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h)
5197*5c23704eSSong GaoTRANS(vilvl_w, LSX, gen_vvv, gen_helper_vilvl_w)
5198*5c23704eSSong GaoTRANS(vilvl_d, LSX, gen_vvv, gen_helper_vilvl_d)
5199*5c23704eSSong GaoTRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b)
5200*5c23704eSSong GaoTRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h)
5201*5c23704eSSong GaoTRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w)
5202*5c23704eSSong GaoTRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d)
5203*5c23704eSSong GaoTRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b)
5204*5c23704eSSong GaoTRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h)
5205*5c23704eSSong GaoTRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w)
5206*5c23704eSSong GaoTRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d)
5207*5c23704eSSong GaoTRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b)
5208*5c23704eSSong GaoTRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h)
5209*5c23704eSSong GaoTRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w)
5210*5c23704eSSong GaoTRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d)
5211*5c23704eSSong Gao
5212*5c23704eSSong GaoTRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
5213*5c23704eSSong GaoTRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
5214*5c23704eSSong GaoTRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w)
5215*5c23704eSSong GaoTRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d)
5216*5c23704eSSong GaoTRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b)
5217*5c23704eSSong GaoTRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h)
5218*5c23704eSSong GaoTRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w)
5219*5c23704eSSong GaoTRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d)
5220*5c23704eSSong GaoTRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b)
5221*5c23704eSSong GaoTRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h)
5222*5c23704eSSong GaoTRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w)
5223*5c23704eSSong GaoTRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d)
5224*5c23704eSSong GaoTRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b)
5225*5c23704eSSong GaoTRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h)
5226*5c23704eSSong GaoTRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w)
5227*5c23704eSSong GaoTRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d)
5228*5c23704eSSong Gao
5229*5c23704eSSong GaoTRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w)
5230*5c23704eSSong GaoTRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w)
5231*5c23704eSSong GaoTRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w)
5232*5c23704eSSong GaoTRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d)
5233*5c23704eSSong GaoTRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q)
5234*5c23704eSSong Gao
5235*5c23704eSSong GaoTRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b)
5236*5c23704eSSong GaoTRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h)
5237*5c23704eSSong GaoTRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w)
5238*5c23704eSSong GaoTRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d)
5239*5c23704eSSong GaoTRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b)
5240*5c23704eSSong GaoTRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h)
5241*5c23704eSSong GaoTRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w)
5242*5c23704eSSong GaoTRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d)
5243*5c23704eSSong Gao
5244*5c23704eSSong Gaostatic bool trans_vld(DisasContext *ctx, arg_vr_i *a)
5245*5c23704eSSong Gao{
5246*5c23704eSSong Gao    TCGv addr;
5247*5c23704eSSong Gao    TCGv_i64 rl, rh;
5248*5c23704eSSong Gao    TCGv_i128 val;
5249*5c23704eSSong Gao
5250*5c23704eSSong Gao    if (!avail_LSX(ctx)) {
5251*5c23704eSSong Gao        return false;
5252*5c23704eSSong Gao    }
5253*5c23704eSSong Gao
5254*5c23704eSSong Gao    if (!check_vec(ctx, 16)) {
5255*5c23704eSSong Gao        return true;
5256*5c23704eSSong Gao    }
5257*5c23704eSSong Gao
5258*5c23704eSSong Gao    addr = gpr_src(ctx, a->rj, EXT_NONE);
5259*5c23704eSSong Gao    val = tcg_temp_new_i128();
5260*5c23704eSSong Gao    rl = tcg_temp_new_i64();
5261*5c23704eSSong Gao    rh = tcg_temp_new_i64();
5262*5c23704eSSong Gao
5263*5c23704eSSong Gao    addr = make_address_i(ctx, addr, a->imm);
5264*5c23704eSSong Gao
5265*5c23704eSSong Gao    tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5266*5c23704eSSong Gao    tcg_gen_extr_i128_i64(rl, rh, val);
5267*5c23704eSSong Gao    set_vreg64(rh, a->vd, 1);
5268*5c23704eSSong Gao    set_vreg64(rl, a->vd, 0);
5269*5c23704eSSong Gao
5270*5c23704eSSong Gao    return true;
5271*5c23704eSSong Gao}
5272*5c23704eSSong Gao
5273*5c23704eSSong Gaostatic bool trans_vst(DisasContext *ctx, arg_vr_i *a)
5274*5c23704eSSong Gao{
5275*5c23704eSSong Gao    TCGv addr;
5276*5c23704eSSong Gao    TCGv_i128 val;
5277*5c23704eSSong Gao    TCGv_i64 ah, al;
5278*5c23704eSSong Gao
5279*5c23704eSSong Gao    if (!avail_LSX(ctx)) {
5280*5c23704eSSong Gao        return false;
5281*5c23704eSSong Gao    }
5282*5c23704eSSong Gao
5283*5c23704eSSong Gao    if (!check_vec(ctx, 16)) {
5284*5c23704eSSong Gao        return true;
5285*5c23704eSSong Gao    }
5286*5c23704eSSong Gao
5287*5c23704eSSong Gao    addr = gpr_src(ctx, a->rj, EXT_NONE);
5288*5c23704eSSong Gao    val = tcg_temp_new_i128();
5289*5c23704eSSong Gao    ah = tcg_temp_new_i64();
5290*5c23704eSSong Gao    al = tcg_temp_new_i64();
5291*5c23704eSSong Gao
5292*5c23704eSSong Gao    addr = make_address_i(ctx, addr, a->imm);
5293*5c23704eSSong Gao
5294*5c23704eSSong Gao    get_vreg64(ah, a->vd, 1);
5295*5c23704eSSong Gao    get_vreg64(al, a->vd, 0);
5296*5c23704eSSong Gao    tcg_gen_concat_i64_i128(val, al, ah);
5297*5c23704eSSong Gao    tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5298*5c23704eSSong Gao
5299*5c23704eSSong Gao    return true;
5300*5c23704eSSong Gao}
5301*5c23704eSSong Gao
5302*5c23704eSSong Gaostatic bool trans_vldx(DisasContext *ctx, arg_vrr *a)
5303*5c23704eSSong Gao{
5304*5c23704eSSong Gao    TCGv addr, src1, src2;
5305*5c23704eSSong Gao    TCGv_i64 rl, rh;
5306*5c23704eSSong Gao    TCGv_i128 val;
5307*5c23704eSSong Gao
5308*5c23704eSSong Gao    if (!avail_LSX(ctx)) {
5309*5c23704eSSong Gao        return false;
5310*5c23704eSSong Gao    }
5311*5c23704eSSong Gao
5312*5c23704eSSong Gao    if (!check_vec(ctx, 16)) {
5313*5c23704eSSong Gao        return true;
5314*5c23704eSSong Gao    }
5315*5c23704eSSong Gao
5316*5c23704eSSong Gao    src1 = gpr_src(ctx, a->rj, EXT_NONE);
5317*5c23704eSSong Gao    src2 = gpr_src(ctx, a->rk, EXT_NONE);
5318*5c23704eSSong Gao    val = tcg_temp_new_i128();
5319*5c23704eSSong Gao    rl = tcg_temp_new_i64();
5320*5c23704eSSong Gao    rh = tcg_temp_new_i64();
5321*5c23704eSSong Gao
5322*5c23704eSSong Gao    addr = make_address_x(ctx, src1, src2);
5323*5c23704eSSong Gao    tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5324*5c23704eSSong Gao    tcg_gen_extr_i128_i64(rl, rh, val);
5325*5c23704eSSong Gao    set_vreg64(rh, a->vd, 1);
5326*5c23704eSSong Gao    set_vreg64(rl, a->vd, 0);
5327*5c23704eSSong Gao
5328*5c23704eSSong Gao    return true;
5329*5c23704eSSong Gao}
5330*5c23704eSSong Gao
5331*5c23704eSSong Gaostatic bool trans_vstx(DisasContext *ctx, arg_vrr *a)
5332*5c23704eSSong Gao{
5333*5c23704eSSong Gao    TCGv addr, src1, src2;
5334*5c23704eSSong Gao    TCGv_i64 ah, al;
5335*5c23704eSSong Gao    TCGv_i128 val;
5336*5c23704eSSong Gao
5337*5c23704eSSong Gao    if (!avail_LSX(ctx)) {
5338*5c23704eSSong Gao        return false;
5339*5c23704eSSong Gao    }
5340*5c23704eSSong Gao
5341*5c23704eSSong Gao    if (!check_vec(ctx, 16)) {
5342*5c23704eSSong Gao        return true;
5343*5c23704eSSong Gao    }
5344*5c23704eSSong Gao
5345*5c23704eSSong Gao    src1 = gpr_src(ctx, a->rj, EXT_NONE);
5346*5c23704eSSong Gao    src2 = gpr_src(ctx, a->rk, EXT_NONE);
5347*5c23704eSSong Gao    val = tcg_temp_new_i128();
5348*5c23704eSSong Gao    ah = tcg_temp_new_i64();
5349*5c23704eSSong Gao    al = tcg_temp_new_i64();
5350*5c23704eSSong Gao
5351*5c23704eSSong Gao    addr = make_address_x(ctx, src1, src2);
5352*5c23704eSSong Gao    get_vreg64(ah, a->vd, 1);
5353*5c23704eSSong Gao    get_vreg64(al, a->vd, 0);
5354*5c23704eSSong Gao    tcg_gen_concat_i64_i128(val, al, ah);
5355*5c23704eSSong Gao    tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5356*5c23704eSSong Gao
5357*5c23704eSSong Gao    return true;
5358*5c23704eSSong Gao}
5359*5c23704eSSong Gao
5360*5c23704eSSong Gaostatic bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a,
5361*5c23704eSSong Gao                          uint32_t oprsz, MemOp mop)
5362*5c23704eSSong Gao{
5363*5c23704eSSong Gao    TCGv addr;
5364*5c23704eSSong Gao    TCGv_i64 val;
5365*5c23704eSSong Gao
5366*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
5367*5c23704eSSong Gao        return true;
5368*5c23704eSSong Gao    }
5369*5c23704eSSong Gao
5370*5c23704eSSong Gao    addr = gpr_src(ctx, a->rj, EXT_NONE);
5371*5c23704eSSong Gao    val = tcg_temp_new_i64();
5372*5c23704eSSong Gao
5373*5c23704eSSong Gao    addr = make_address_i(ctx, addr, a->imm);
5374*5c23704eSSong Gao
5375*5c23704eSSong Gao    tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop);
5376*5c23704eSSong Gao    tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val);
5377*5c23704eSSong Gao
5378*5c23704eSSong Gao    return true;
5379*5c23704eSSong Gao}
5380*5c23704eSSong Gao
5381*5c23704eSSong Gaostatic bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
5382*5c23704eSSong Gao{
5383*5c23704eSSong Gao    return do_vldrepl_vl(ctx, a, 16, mop);
5384*5c23704eSSong Gao}
5385*5c23704eSSong Gao
5386*5c23704eSSong Gaostatic bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
5387*5c23704eSSong Gao{
5388*5c23704eSSong Gao    return do_vldrepl_vl(ctx, a, 32, mop);
5389*5c23704eSSong Gao}
5390*5c23704eSSong Gao
5391*5c23704eSSong GaoTRANS(vldrepl_b, LSX, do_vldrepl, MO_8)
5392*5c23704eSSong GaoTRANS(vldrepl_h, LSX, do_vldrepl, MO_16)
5393*5c23704eSSong GaoTRANS(vldrepl_w, LSX, do_vldrepl, MO_32)
5394*5c23704eSSong GaoTRANS(vldrepl_d, LSX, do_vldrepl, MO_64)
5395*5c23704eSSong GaoTRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8)
5396*5c23704eSSong GaoTRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16)
5397*5c23704eSSong GaoTRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32)
5398*5c23704eSSong GaoTRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64)
5399*5c23704eSSong Gao
5400*5c23704eSSong Gaostatic bool do_vstelm_vl(DisasContext *ctx,
5401*5c23704eSSong Gao                         arg_vr_ii *a, uint32_t oprsz, MemOp mop)
5402*5c23704eSSong Gao{
5403*5c23704eSSong Gao    TCGv addr;
5404*5c23704eSSong Gao    TCGv_i64 val;
5405*5c23704eSSong Gao
5406*5c23704eSSong Gao    if (!check_vec(ctx, oprsz)) {
5407*5c23704eSSong Gao        return true;
5408*5c23704eSSong Gao    }
5409*5c23704eSSong Gao
5410*5c23704eSSong Gao    addr = gpr_src(ctx, a->rj, EXT_NONE);
5411*5c23704eSSong Gao    val = tcg_temp_new_i64();
5412*5c23704eSSong Gao
5413*5c23704eSSong Gao    addr = make_address_i(ctx, addr, a->imm);
5414*5c23704eSSong Gao    tcg_gen_ld_i64(val, tcg_env, vec_reg_offset(a->vd, a->imm2, mop));
5415*5c23704eSSong Gao    tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, mop);
5416*5c23704eSSong Gao    return true;
5417*5c23704eSSong Gao}
5418*5c23704eSSong Gao
5419*5c23704eSSong Gaostatic bool do_vstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
5420*5c23704eSSong Gao{
5421*5c23704eSSong Gao    return do_vstelm_vl(ctx, a, 16, mop);
5422*5c23704eSSong Gao}
5423*5c23704eSSong Gao
5424*5c23704eSSong Gaostatic bool do_xvstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
5425*5c23704eSSong Gao{
5426*5c23704eSSong Gao    return do_vstelm_vl(ctx, a, 32, mop);
5427*5c23704eSSong Gao}
5428*5c23704eSSong Gao
5429*5c23704eSSong GaoTRANS(vstelm_b, LSX, do_vstelm, MO_8)
5430*5c23704eSSong GaoTRANS(vstelm_h, LSX, do_vstelm, MO_16)
5431*5c23704eSSong GaoTRANS(vstelm_w, LSX, do_vstelm, MO_32)
5432*5c23704eSSong GaoTRANS(vstelm_d, LSX, do_vstelm, MO_64)
5433*5c23704eSSong GaoTRANS(xvstelm_b, LASX, do_xvstelm, MO_8)
5434*5c23704eSSong GaoTRANS(xvstelm_h, LASX, do_xvstelm, MO_16)
5435*5c23704eSSong GaoTRANS(xvstelm_w, LASX, do_xvstelm, MO_32)
5436*5c23704eSSong GaoTRANS(xvstelm_d, LASX, do_xvstelm, MO_64)
5437*5c23704eSSong Gao
5438*5c23704eSSong Gaostatic bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a,
5439*5c23704eSSong Gao                            void (*func)(DisasContext *, int, TCGv))
5440*5c23704eSSong Gao{
5441*5c23704eSSong Gao    TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
5442*5c23704eSSong Gao    TCGv temp = NULL;
5443*5c23704eSSong Gao
5444*5c23704eSSong Gao    if (!check_vec(ctx, 32)) {
5445*5c23704eSSong Gao        return true;
5446*5c23704eSSong Gao    }
5447*5c23704eSSong Gao
5448*5c23704eSSong Gao    if (a->imm) {
5449*5c23704eSSong Gao        temp = tcg_temp_new();
5450*5c23704eSSong Gao        tcg_gen_addi_tl(temp, addr, a->imm);
5451*5c23704eSSong Gao        addr = temp;
5452*5c23704eSSong Gao    }
5453*5c23704eSSong Gao
5454*5c23704eSSong Gao    func(ctx, a->vd, addr);
5455*5c23704eSSong Gao    return true;
5456*5c23704eSSong Gao}
5457*5c23704eSSong Gao
5458*5c23704eSSong Gaostatic void gen_xvld(DisasContext *ctx, int vreg, TCGv addr)
5459*5c23704eSSong Gao{
5460*5c23704eSSong Gao    int i;
5461*5c23704eSSong Gao    TCGv temp = tcg_temp_new();
5462*5c23704eSSong Gao    TCGv dest = tcg_temp_new();
5463*5c23704eSSong Gao
5464*5c23704eSSong Gao    tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
5465*5c23704eSSong Gao    set_vreg64(dest, vreg, 0);
5466*5c23704eSSong Gao
5467*5c23704eSSong Gao    for (i = 1; i < 4; i++) {
5468*5c23704eSSong Gao        tcg_gen_addi_tl(temp, addr, 8 * i);
5469*5c23704eSSong Gao        tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
5470*5c23704eSSong Gao        set_vreg64(dest, vreg, i);
5471*5c23704eSSong Gao    }
5472*5c23704eSSong Gao}
5473*5c23704eSSong Gao
5474*5c23704eSSong Gaostatic void gen_xvst(DisasContext * ctx, int vreg, TCGv addr)
5475*5c23704eSSong Gao{
5476*5c23704eSSong Gao    int i;
5477*5c23704eSSong Gao    TCGv temp = tcg_temp_new();
5478*5c23704eSSong Gao    TCGv dest = tcg_temp_new();
5479*5c23704eSSong Gao
5480*5c23704eSSong Gao    get_vreg64(dest, vreg, 0);
5481*5c23704eSSong Gao    tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
5482*5c23704eSSong Gao
5483*5c23704eSSong Gao    for (i = 1; i < 4; i++) {
5484*5c23704eSSong Gao        tcg_gen_addi_tl(temp, addr, 8 * i);
5485*5c23704eSSong Gao        get_vreg64(dest, vreg, i);
5486*5c23704eSSong Gao        tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
5487*5c23704eSSong Gao    }
5488*5c23704eSSong Gao}
5489*5c23704eSSong Gao
5490*5c23704eSSong GaoTRANS(xvld, LASX, gen_lasx_memory, gen_xvld)
5491*5c23704eSSong GaoTRANS(xvst, LASX, gen_lasx_memory, gen_xvst)
5492*5c23704eSSong Gao
5493*5c23704eSSong Gaostatic bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a,
5494*5c23704eSSong Gao                             void (*func)(DisasContext*, int, TCGv))
5495*5c23704eSSong Gao{
5496*5c23704eSSong Gao    TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
5497*5c23704eSSong Gao    TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
5498*5c23704eSSong Gao    TCGv addr = tcg_temp_new();
5499*5c23704eSSong Gao
5500*5c23704eSSong Gao    if (!check_vec(ctx, 32)) {
5501*5c23704eSSong Gao        return true;
5502*5c23704eSSong Gao    }
5503*5c23704eSSong Gao
5504*5c23704eSSong Gao    tcg_gen_add_tl(addr, src1, src2);
5505*5c23704eSSong Gao    func(ctx, a->vd, addr);
5506*5c23704eSSong Gao
5507*5c23704eSSong Gao    return true;
5508*5c23704eSSong Gao}
5509*5c23704eSSong Gao
5510*5c23704eSSong GaoTRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld)
5511*5c23704eSSong GaoTRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst)
5512