1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * LoongArch vector translate functions
4 * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
5 */
6
7static bool check_vec(DisasContext *ctx, uint32_t oprsz)
8{
9    if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
10        generate_exception(ctx, EXCCODE_SXD);
11        return false;
12    }
13
14    if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) {
15        generate_exception(ctx, EXCCODE_ASXD);
16        return false;
17    }
18
19    return true;
20}
21
22static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
23                            gen_helper_gvec_4_ptr *fn)
24{
25    if (!check_vec(ctx, oprsz)) {
26        return true;
27    }
28
29    tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
30                       vec_full_offset(a->vj),
31                       vec_full_offset(a->vk),
32                       vec_full_offset(a->va),
33                       tcg_env,
34                       oprsz, ctx->vl / 8, 0, fn);
35    return true;
36}
37
38static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
39                         gen_helper_gvec_4_ptr *fn)
40{
41    return gen_vvvv_ptr_vl(ctx, a, 16, fn);
42}
43
44static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a,
45                         gen_helper_gvec_4_ptr *fn)
46{
47    return gen_vvvv_ptr_vl(ctx, a, 32, fn);
48}
49
50static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
51                        gen_helper_gvec_4 *fn)
52{
53    if (!check_vec(ctx, oprsz)) {
54        return true;
55    }
56
57    tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
58                       vec_full_offset(a->vj),
59                       vec_full_offset(a->vk),
60                       vec_full_offset(a->va),
61                       oprsz, ctx->vl / 8, 0, fn);
62    return true;
63}
64
65static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
66                     gen_helper_gvec_4 *fn)
67{
68    return gen_vvvv_vl(ctx, a, 16, fn);
69}
70
71static bool gen_xxxx(DisasContext *ctx, arg_vvvv *a,
72                     gen_helper_gvec_4 *fn)
73{
74    return gen_vvvv_vl(ctx, a, 32, fn);
75}
76
77static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
78                           gen_helper_gvec_3_ptr *fn)
79{
80    if (!check_vec(ctx, oprsz)) {
81        return true;
82    }
83    tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
84                       vec_full_offset(a->vj),
85                       vec_full_offset(a->vk),
86                       tcg_env,
87                       oprsz, ctx->vl / 8, 0, fn);
88    return true;
89}
90
91static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
92                        gen_helper_gvec_3_ptr *fn)
93{
94    return gen_vvv_ptr_vl(ctx, a, 16, fn);
95}
96
97static bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a,
98                        gen_helper_gvec_3_ptr *fn)
99{
100    return gen_vvv_ptr_vl(ctx, a, 32, fn);
101}
102
103static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
104                       gen_helper_gvec_3 *fn)
105{
106    if (!check_vec(ctx, oprsz)) {
107        return true;
108    }
109
110    tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
111                       vec_full_offset(a->vj),
112                       vec_full_offset(a->vk),
113                       oprsz, ctx->vl / 8, 0, fn);
114    return true;
115}
116
117static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
118{
119    return gen_vvv_vl(ctx, a, 16, fn);
120}
121
122static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
123{
124    return gen_vvv_vl(ctx, a, 32, fn);
125}
126
127static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
128                          gen_helper_gvec_2_ptr *fn)
129{
130    if (!check_vec(ctx, oprsz)) {
131        return true;
132    }
133
134    tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
135                       vec_full_offset(a->vj),
136                       tcg_env,
137                       oprsz, ctx->vl / 8, 0, fn);
138    return true;
139}
140
141static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
142                       gen_helper_gvec_2_ptr *fn)
143{
144    return gen_vv_ptr_vl(ctx, a, 16, fn);
145}
146
147static bool gen_xx_ptr(DisasContext *ctx, arg_vv *a,
148                       gen_helper_gvec_2_ptr *fn)
149{
150    return gen_vv_ptr_vl(ctx, a, 32, fn);
151}
152
153static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
154                      gen_helper_gvec_2 *fn)
155{
156    if (!check_vec(ctx, oprsz)) {
157        return true;
158    }
159
160    tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
161                       vec_full_offset(a->vj),
162                       oprsz, ctx->vl / 8, 0, fn);
163    return true;
164}
165
166static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
167{
168    return gen_vv_vl(ctx, a, 16, fn);
169}
170
171static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
172{
173    return gen_vv_vl(ctx, a, 32, fn);
174}
175
176static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
177                        gen_helper_gvec_2i *fn)
178{
179    if (!check_vec(ctx, oprsz)) {
180        return true;
181    }
182
183    tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
184                        vec_full_offset(a->vj),
185                        tcg_constant_i64(a->imm),
186                        oprsz, ctx->vl / 8, 0, fn);
187    return true;
188}
189
190static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
191{
192    return gen_vv_i_vl(ctx, a, 16, fn);
193}
194
195static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
196{
197    return gen_vv_i_vl(ctx, a, 32, fn);
198}
199
200static bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz,
201                      void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
202{
203    if (!check_vec(ctx, sz)) {
204        return true;
205    }
206
207    TCGv_i32 vj = tcg_constant_i32(a->vj);
208    TCGv_i32 cd = tcg_constant_i32(a->cd);
209    TCGv_i32 oprsz = tcg_constant_i32(sz);
210
211    func(tcg_env, oprsz, cd, vj);
212    return true;
213}
214
215static bool gen_cv(DisasContext *ctx, arg_cv *a,
216                   void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
217{
218    return gen_cv_vl(ctx, a, 16, func);
219}
220
221static bool gen_cx(DisasContext *ctx, arg_cv *a,
222                   void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
223{
224    return gen_cv_vl(ctx, a, 32, func);
225}
226
227static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
228                        uint32_t oprsz, MemOp mop,
229                        void (*func)(unsigned, uint32_t, uint32_t,
230                                     uint32_t, uint32_t, uint32_t))
231{
232    uint32_t vd_ofs = vec_full_offset(a->vd);
233    uint32_t vj_ofs = vec_full_offset(a->vj);
234    uint32_t vk_ofs = vec_full_offset(a->vk);
235
236    if (!check_vec(ctx, oprsz)) {
237        return true;
238    }
239
240    func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
241    return true;
242}
243
244static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
245                     void (*func)(unsigned, uint32_t, uint32_t,
246                                  uint32_t, uint32_t, uint32_t))
247{
248    return gvec_vvv_vl(ctx, a, 16, mop, func);
249}
250
251static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
252                     void (*func)(unsigned, uint32_t, uint32_t,
253                                  uint32_t, uint32_t, uint32_t))
254{
255    return gvec_vvv_vl(ctx, a, 32, mop, func);
256}
257
258static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
259                       uint32_t oprsz, MemOp mop,
260                       void (*func)(unsigned, uint32_t, uint32_t,
261                                    uint32_t, uint32_t))
262{
263    uint32_t vd_ofs = vec_full_offset(a->vd);
264    uint32_t vj_ofs = vec_full_offset(a->vj);
265
266    if (!check_vec(ctx, oprsz)) {
267        return true;
268    }
269
270    func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
271    return true;
272}
273
274
275static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
276                    void (*func)(unsigned, uint32_t, uint32_t,
277                                 uint32_t, uint32_t))
278{
279    return gvec_vv_vl(ctx, a, 16, mop, func);
280}
281
282static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
283                    void (*func)(unsigned, uint32_t, uint32_t,
284                                 uint32_t, uint32_t))
285{
286    return gvec_vv_vl(ctx, a, 32, mop, func);
287}
288
289static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
290                         uint32_t oprsz, MemOp mop,
291                         void (*func)(unsigned, uint32_t, uint32_t,
292                                      int64_t, uint32_t, uint32_t))
293{
294    uint32_t vd_ofs = vec_full_offset(a->vd);
295    uint32_t vj_ofs = vec_full_offset(a->vj);
296
297    if (!check_vec(ctx, oprsz)) {
298        return true;
299    }
300
301    func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
302    return true;
303}
304
305static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
306                      void (*func)(unsigned, uint32_t, uint32_t,
307                                   int64_t, uint32_t, uint32_t))
308{
309    return gvec_vv_i_vl(ctx, a, 16, mop, func);
310}
311
312static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
313                      void (*func)(unsigned, uint32_t, uint32_t,
314                                   int64_t, uint32_t, uint32_t))
315{
316    return gvec_vv_i_vl(ctx,a, 32, mop, func);
317}
318
319static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
320                         uint32_t oprsz, MemOp mop)
321{
322    uint32_t vd_ofs = vec_full_offset(a->vd);
323    uint32_t vj_ofs = vec_full_offset(a->vj);
324
325    if (!check_vec(ctx, oprsz)) {
326        return true;
327    }
328
329    tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
330    return true;
331}
332
333static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
334{
335    return gvec_subi_vl(ctx, a, 16, mop);
336}
337
338static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
339{
340    return gvec_subi_vl(ctx, a, 32, mop);
341}
342
343TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
344TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
345TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
346TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add)
347TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add)
348TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add)
349TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add)
350TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add)
351
352static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
353                             void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
354                                          TCGv_i64, TCGv_i64, TCGv_i64))
355{
356    int i;
357    TCGv_i64 rh, rl, ah, al, bh, bl;
358
359    if (!check_vec(ctx, oprsz)) {
360        return true;
361    }
362
363    rh = tcg_temp_new_i64();
364    rl = tcg_temp_new_i64();
365    ah = tcg_temp_new_i64();
366    al = tcg_temp_new_i64();
367    bh = tcg_temp_new_i64();
368    bl = tcg_temp_new_i64();
369
370    for (i = 0; i < oprsz / 16; i++) {
371        get_vreg64(ah, a->vj, 1 + i * 2);
372        get_vreg64(al, a->vj, i * 2);
373        get_vreg64(bh, a->vk, 1 + i * 2);
374        get_vreg64(bl, a->vk, i * 2);
375
376        func(rl, rh, al, ah, bl, bh);
377
378        set_vreg64(rh, a->vd, 1 + i * 2);
379        set_vreg64(rl, a->vd, i * 2);
380    }
381    return true;
382}
383
384static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
385                          void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
386                                       TCGv_i64, TCGv_i64, TCGv_i64))
387{
388    return gen_vaddsub_q_vl(ctx, a, 16, func);
389}
390
391static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
392                           void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
393                                        TCGv_i64, TCGv_i64, TCGv_i64))
394{
395    return gen_vaddsub_q_vl(ctx, a, 32, func);
396}
397
398TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub)
399TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub)
400TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub)
401TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub)
402TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub)
403TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub)
404TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub)
405TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub)
406
407TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64)
408TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64)
409TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64)
410TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64)
411
412TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
413TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
414TRANS(vaddi_wu, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_addi)
415TRANS(vaddi_du, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_addi)
416TRANS(vsubi_bu, LSX, gvec_subi, MO_8)
417TRANS(vsubi_hu, LSX, gvec_subi, MO_16)
418TRANS(vsubi_wu, LSX, gvec_subi, MO_32)
419TRANS(vsubi_du, LSX, gvec_subi, MO_64)
420TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi)
421TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi)
422TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi)
423TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi)
424TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8)
425TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16)
426TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32)
427TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64)
428
429TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
430TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
431TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg)
432TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg)
433TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg)
434TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg)
435TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg)
436TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg)
437
438TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
439TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
440TRANS(vsadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ssadd)
441TRANS(vsadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ssadd)
442TRANS(vsadd_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_usadd)
443TRANS(vsadd_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_usadd)
444TRANS(vsadd_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_usadd)
445TRANS(vsadd_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_usadd)
446TRANS(vssub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sssub)
447TRANS(vssub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sssub)
448TRANS(vssub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sssub)
449TRANS(vssub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sssub)
450TRANS(vssub_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
451TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
452TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
453TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
454
455TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd)
456TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd)
457TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd)
458TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd)
459TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd)
460TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd)
461TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd)
462TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd)
463TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub)
464TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub)
465TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub)
466TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub)
467TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub)
468TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub)
469TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub)
470TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub)
471
472TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b)
473TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h)
474TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w)
475TRANS(vhaddw_q_d, LSX, gen_vvv, gen_helper_vhaddw_q_d)
476TRANS(vhaddw_hu_bu, LSX, gen_vvv, gen_helper_vhaddw_hu_bu)
477TRANS(vhaddw_wu_hu, LSX, gen_vvv, gen_helper_vhaddw_wu_hu)
478TRANS(vhaddw_du_wu, LSX, gen_vvv, gen_helper_vhaddw_du_wu)
479TRANS(vhaddw_qu_du, LSX, gen_vvv, gen_helper_vhaddw_qu_du)
480TRANS(vhsubw_h_b, LSX, gen_vvv, gen_helper_vhsubw_h_b)
481TRANS(vhsubw_w_h, LSX, gen_vvv, gen_helper_vhsubw_w_h)
482TRANS(vhsubw_d_w, LSX, gen_vvv, gen_helper_vhsubw_d_w)
483TRANS(vhsubw_q_d, LSX, gen_vvv, gen_helper_vhsubw_q_d)
484TRANS(vhsubw_hu_bu, LSX, gen_vvv, gen_helper_vhsubw_hu_bu)
485TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu)
486TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu)
487TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du)
488
489TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b)
490TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h)
491TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w)
492TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d)
493TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu)
494TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu)
495TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu)
496TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du)
497TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b)
498TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h)
499TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w)
500TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d)
501TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu)
502TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu)
503TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu)
504TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du)
505
506static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
507{
508    TCGv_vec t1, t2;
509
510    int halfbits = 4 << vece;
511
512    t1 = tcg_temp_new_vec_matching(a);
513    t2 = tcg_temp_new_vec_matching(b);
514
515    /* Sign-extend the even elements from a */
516    tcg_gen_shli_vec(vece, t1, a, halfbits);
517    tcg_gen_sari_vec(vece, t1, t1, halfbits);
518
519    /* Sign-extend the even elements from b */
520    tcg_gen_shli_vec(vece, t2, b, halfbits);
521    tcg_gen_sari_vec(vece, t2, t2, halfbits);
522
523    tcg_gen_add_vec(vece, t, t1, t2);
524}
525
526static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
527{
528    TCGv_i32 t1, t2;
529
530    t1 = tcg_temp_new_i32();
531    t2 = tcg_temp_new_i32();
532    tcg_gen_ext16s_i32(t1, a);
533    tcg_gen_ext16s_i32(t2, b);
534    tcg_gen_add_i32(t, t1, t2);
535}
536
537static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
538{
539    TCGv_i64 t1, t2;
540
541    t1 = tcg_temp_new_i64();
542    t2 = tcg_temp_new_i64();
543    tcg_gen_ext32s_i64(t1, a);
544    tcg_gen_ext32s_i64(t2, b);
545    tcg_gen_add_i64(t, t1, t2);
546}
547
548static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
549                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
550{
551    static const TCGOpcode vecop_list[] = {
552        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
553        };
554    static const GVecGen3 op[4] = {
555        {
556            .fniv = gen_vaddwev_s,
557            .fno = gen_helper_vaddwev_h_b,
558            .opt_opc = vecop_list,
559            .vece = MO_16
560        },
561        {
562            .fni4 = gen_vaddwev_w_h,
563            .fniv = gen_vaddwev_s,
564            .fno = gen_helper_vaddwev_w_h,
565            .opt_opc = vecop_list,
566            .vece = MO_32
567        },
568        {
569            .fni8 = gen_vaddwev_d_w,
570            .fniv = gen_vaddwev_s,
571            .fno = gen_helper_vaddwev_d_w,
572            .opt_opc = vecop_list,
573            .vece = MO_64
574        },
575        {
576            .fno = gen_helper_vaddwev_q_d,
577            .vece = MO_128
578        },
579    };
580
581    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
582}
583
584TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s)
585TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s)
586TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s)
587TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s)
588TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s)
589TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s)
590TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s)
591TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s)
592
593static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
594{
595    TCGv_i32 t1, t2;
596
597    t1 = tcg_temp_new_i32();
598    t2 = tcg_temp_new_i32();
599    tcg_gen_sari_i32(t1, a, 16);
600    tcg_gen_sari_i32(t2, b, 16);
601    tcg_gen_add_i32(t, t1, t2);
602}
603
604static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
605{
606    TCGv_i64 t1, t2;
607
608    t1 = tcg_temp_new_i64();
609    t2 = tcg_temp_new_i64();
610    tcg_gen_sari_i64(t1, a, 32);
611    tcg_gen_sari_i64(t2, b, 32);
612    tcg_gen_add_i64(t, t1, t2);
613}
614
615static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
616{
617    TCGv_vec t1, t2;
618
619    int halfbits = 4 << vece;
620
621    t1 = tcg_temp_new_vec_matching(a);
622    t2 = tcg_temp_new_vec_matching(b);
623
624    /* Sign-extend the odd elements for vector */
625    tcg_gen_sari_vec(vece, t1, a, halfbits);
626    tcg_gen_sari_vec(vece, t2, b, halfbits);
627
628    tcg_gen_add_vec(vece, t, t1, t2);
629}
630
631static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
632                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
633{
634    static const TCGOpcode vecop_list[] = {
635        INDEX_op_sari_vec, INDEX_op_add_vec, 0
636        };
637    static const GVecGen3 op[4] = {
638        {
639            .fniv = gen_vaddwod_s,
640            .fno = gen_helper_vaddwod_h_b,
641            .opt_opc = vecop_list,
642            .vece = MO_16
643        },
644        {
645            .fni4 = gen_vaddwod_w_h,
646            .fniv = gen_vaddwod_s,
647            .fno = gen_helper_vaddwod_w_h,
648            .opt_opc = vecop_list,
649            .vece = MO_32
650        },
651        {
652            .fni8 = gen_vaddwod_d_w,
653            .fniv = gen_vaddwod_s,
654            .fno = gen_helper_vaddwod_d_w,
655            .opt_opc = vecop_list,
656            .vece = MO_64
657        },
658        {
659            .fno = gen_helper_vaddwod_q_d,
660            .vece = MO_128
661        },
662    };
663
664    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
665}
666
667TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s)
668TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s)
669TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s)
670TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s)
671TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s)
672TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s)
673TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s)
674TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s)
675
676
677static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
678{
679    TCGv_vec t1, t2;
680
681    int halfbits = 4 << vece;
682
683    t1 = tcg_temp_new_vec_matching(a);
684    t2 = tcg_temp_new_vec_matching(b);
685
686    /* Sign-extend the even elements from a */
687    tcg_gen_shli_vec(vece, t1, a, halfbits);
688    tcg_gen_sari_vec(vece, t1, t1, halfbits);
689
690    /* Sign-extend the even elements from b */
691    tcg_gen_shli_vec(vece, t2, b, halfbits);
692    tcg_gen_sari_vec(vece, t2, t2, halfbits);
693
694    tcg_gen_sub_vec(vece, t, t1, t2);
695}
696
697static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
698{
699    TCGv_i32 t1, t2;
700
701    t1 = tcg_temp_new_i32();
702    t2 = tcg_temp_new_i32();
703    tcg_gen_ext16s_i32(t1, a);
704    tcg_gen_ext16s_i32(t2, b);
705    tcg_gen_sub_i32(t, t1, t2);
706}
707
708static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
709{
710    TCGv_i64 t1, t2;
711
712    t1 = tcg_temp_new_i64();
713    t2 = tcg_temp_new_i64();
714    tcg_gen_ext32s_i64(t1, a);
715    tcg_gen_ext32s_i64(t2, b);
716    tcg_gen_sub_i64(t, t1, t2);
717}
718
719static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
720                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
721{
722    static const TCGOpcode vecop_list[] = {
723        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0
724        };
725    static const GVecGen3 op[4] = {
726        {
727            .fniv = gen_vsubwev_s,
728            .fno = gen_helper_vsubwev_h_b,
729            .opt_opc = vecop_list,
730            .vece = MO_16
731        },
732        {
733            .fni4 = gen_vsubwev_w_h,
734            .fniv = gen_vsubwev_s,
735            .fno = gen_helper_vsubwev_w_h,
736            .opt_opc = vecop_list,
737            .vece = MO_32
738        },
739        {
740            .fni8 = gen_vsubwev_d_w,
741            .fniv = gen_vsubwev_s,
742            .fno = gen_helper_vsubwev_d_w,
743            .opt_opc = vecop_list,
744            .vece = MO_64
745        },
746        {
747            .fno = gen_helper_vsubwev_q_d,
748            .vece = MO_128
749        },
750    };
751
752    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
753}
754
755TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s)
756TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s)
757TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s)
758TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s)
759TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s)
760TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s)
761TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s)
762TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s)
763
764static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
765{
766    TCGv_vec t1, t2;
767
768    int halfbits = 4 << vece;
769
770    t1 = tcg_temp_new_vec_matching(a);
771    t2 = tcg_temp_new_vec_matching(b);
772
773    /* Sign-extend the odd elements for vector */
774    tcg_gen_sari_vec(vece, t1, a, halfbits);
775    tcg_gen_sari_vec(vece, t2, b, halfbits);
776
777    tcg_gen_sub_vec(vece, t, t1, t2);
778}
779
780static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
781{
782    TCGv_i32 t1, t2;
783
784    t1 = tcg_temp_new_i32();
785    t2 = tcg_temp_new_i32();
786    tcg_gen_sari_i32(t1, a, 16);
787    tcg_gen_sari_i32(t2, b, 16);
788    tcg_gen_sub_i32(t, t1, t2);
789}
790
791static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
792{
793    TCGv_i64 t1, t2;
794
795    t1 = tcg_temp_new_i64();
796    t2 = tcg_temp_new_i64();
797    tcg_gen_sari_i64(t1, a, 32);
798    tcg_gen_sari_i64(t2, b, 32);
799    tcg_gen_sub_i64(t, t1, t2);
800}
801
802static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
803                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
804{
805    static const TCGOpcode vecop_list[] = {
806        INDEX_op_sari_vec, INDEX_op_sub_vec, 0
807        };
808    static const GVecGen3 op[4] = {
809        {
810            .fniv = gen_vsubwod_s,
811            .fno = gen_helper_vsubwod_h_b,
812            .opt_opc = vecop_list,
813            .vece = MO_16
814        },
815        {
816            .fni4 = gen_vsubwod_w_h,
817            .fniv = gen_vsubwod_s,
818            .fno = gen_helper_vsubwod_w_h,
819            .opt_opc = vecop_list,
820            .vece = MO_32
821        },
822        {
823            .fni8 = gen_vsubwod_d_w,
824            .fniv = gen_vsubwod_s,
825            .fno = gen_helper_vsubwod_d_w,
826            .opt_opc = vecop_list,
827            .vece = MO_64
828        },
829        {
830            .fno = gen_helper_vsubwod_q_d,
831            .vece = MO_128
832        },
833    };
834
835    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
836}
837
838TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s)
839TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s)
840TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s)
841TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s)
842TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s)
843TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s)
844TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s)
845TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s)
846
847static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
848{
849    TCGv_vec t1, t2, t3;
850
851    t1 = tcg_temp_new_vec_matching(a);
852    t2 = tcg_temp_new_vec_matching(b);
853    t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
854    tcg_gen_and_vec(vece, t1, a, t3);
855    tcg_gen_and_vec(vece, t2, b, t3);
856    tcg_gen_add_vec(vece, t, t1, t2);
857}
858
859static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
860{
861    TCGv_i32 t1, t2;
862
863    t1 = tcg_temp_new_i32();
864    t2 = tcg_temp_new_i32();
865    tcg_gen_ext16u_i32(t1, a);
866    tcg_gen_ext16u_i32(t2, b);
867    tcg_gen_add_i32(t, t1, t2);
868}
869
870static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
871{
872    TCGv_i64 t1, t2;
873
874    t1 = tcg_temp_new_i64();
875    t2 = tcg_temp_new_i64();
876    tcg_gen_ext32u_i64(t1, a);
877    tcg_gen_ext32u_i64(t2, b);
878    tcg_gen_add_i64(t, t1, t2);
879}
880
881static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
882                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
883{
884    static const TCGOpcode vecop_list[] = {
885        INDEX_op_add_vec, 0
886        };
887    static const GVecGen3 op[4] = {
888        {
889            .fniv = gen_vaddwev_u,
890            .fno = gen_helper_vaddwev_h_bu,
891            .opt_opc = vecop_list,
892            .vece = MO_16
893        },
894        {
895            .fni4 = gen_vaddwev_w_hu,
896            .fniv = gen_vaddwev_u,
897            .fno = gen_helper_vaddwev_w_hu,
898            .opt_opc = vecop_list,
899            .vece = MO_32
900        },
901        {
902            .fni8 = gen_vaddwev_d_wu,
903            .fniv = gen_vaddwev_u,
904            .fno = gen_helper_vaddwev_d_wu,
905            .opt_opc = vecop_list,
906            .vece = MO_64
907        },
908        {
909            .fno = gen_helper_vaddwev_q_du,
910            .vece = MO_128
911        },
912    };
913
914    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
915}
916
917TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u)
918TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u)
919TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u)
920TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u)
921TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u)
922TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u)
923TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u)
924TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u)
925
926static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
927{
928    TCGv_vec t1, t2;
929
930    int halfbits = 4 << vece;
931
932    t1 = tcg_temp_new_vec_matching(a);
933    t2 = tcg_temp_new_vec_matching(b);
934
935    /* Zero-extend the odd elements for vector */
936    tcg_gen_shri_vec(vece, t1, a, halfbits);
937    tcg_gen_shri_vec(vece, t2, b, halfbits);
938
939    tcg_gen_add_vec(vece, t, t1, t2);
940}
941
942static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
943{
944    TCGv_i32 t1, t2;
945
946    t1 = tcg_temp_new_i32();
947    t2 = tcg_temp_new_i32();
948    tcg_gen_shri_i32(t1, a, 16);
949    tcg_gen_shri_i32(t2, b, 16);
950    tcg_gen_add_i32(t, t1, t2);
951}
952
953static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
954{
955    TCGv_i64 t1, t2;
956
957    t1 = tcg_temp_new_i64();
958    t2 = tcg_temp_new_i64();
959    tcg_gen_shri_i64(t1, a, 32);
960    tcg_gen_shri_i64(t2, b, 32);
961    tcg_gen_add_i64(t, t1, t2);
962}
963
964static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
965                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
966{
967    static const TCGOpcode vecop_list[] = {
968        INDEX_op_shri_vec, INDEX_op_add_vec, 0
969        };
970    static const GVecGen3 op[4] = {
971        {
972            .fniv = gen_vaddwod_u,
973            .fno = gen_helper_vaddwod_h_bu,
974            .opt_opc = vecop_list,
975            .vece = MO_16
976        },
977        {
978            .fni4 = gen_vaddwod_w_hu,
979            .fniv = gen_vaddwod_u,
980            .fno = gen_helper_vaddwod_w_hu,
981            .opt_opc = vecop_list,
982            .vece = MO_32
983        },
984        {
985            .fni8 = gen_vaddwod_d_wu,
986            .fniv = gen_vaddwod_u,
987            .fno = gen_helper_vaddwod_d_wu,
988            .opt_opc = vecop_list,
989            .vece = MO_64
990        },
991        {
992            .fno = gen_helper_vaddwod_q_du,
993            .vece = MO_128
994        },
995    };
996
997    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
998}
999
1000TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u)
1001TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u)
1002TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u)
1003TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u)
1004TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u)
1005TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u)
1006TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u)
1007TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u)
1008
1009static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1010{
1011    TCGv_vec t1, t2, t3;
1012
1013    t1 = tcg_temp_new_vec_matching(a);
1014    t2 = tcg_temp_new_vec_matching(b);
1015    t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
1016    tcg_gen_and_vec(vece, t1, a, t3);
1017    tcg_gen_and_vec(vece, t2, b, t3);
1018    tcg_gen_sub_vec(vece, t, t1, t2);
1019}
1020
1021static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1022{
1023    TCGv_i32 t1, t2;
1024
1025    t1 = tcg_temp_new_i32();
1026    t2 = tcg_temp_new_i32();
1027    tcg_gen_ext16u_i32(t1, a);
1028    tcg_gen_ext16u_i32(t2, b);
1029    tcg_gen_sub_i32(t, t1, t2);
1030}
1031
1032static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1033{
1034    TCGv_i64 t1, t2;
1035
1036    t1 = tcg_temp_new_i64();
1037    t2 = tcg_temp_new_i64();
1038    tcg_gen_ext32u_i64(t1, a);
1039    tcg_gen_ext32u_i64(t2, b);
1040    tcg_gen_sub_i64(t, t1, t2);
1041}
1042
1043static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1044                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1045{
1046    static const TCGOpcode vecop_list[] = {
1047        INDEX_op_sub_vec, 0
1048        };
1049    static const GVecGen3 op[4] = {
1050        {
1051            .fniv = gen_vsubwev_u,
1052            .fno = gen_helper_vsubwev_h_bu,
1053            .opt_opc = vecop_list,
1054            .vece = MO_16
1055        },
1056        {
1057            .fni4 = gen_vsubwev_w_hu,
1058            .fniv = gen_vsubwev_u,
1059            .fno = gen_helper_vsubwev_w_hu,
1060            .opt_opc = vecop_list,
1061            .vece = MO_32
1062        },
1063        {
1064            .fni8 = gen_vsubwev_d_wu,
1065            .fniv = gen_vsubwev_u,
1066            .fno = gen_helper_vsubwev_d_wu,
1067            .opt_opc = vecop_list,
1068            .vece = MO_64
1069        },
1070        {
1071            .fno = gen_helper_vsubwev_q_du,
1072            .vece = MO_128
1073        },
1074    };
1075
1076    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1077}
1078
1079TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u)
1080TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u)
1081TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u)
1082TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u)
1083TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u)
1084TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u)
1085TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u)
1086TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u)
1087
1088static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1089{
1090    TCGv_vec t1, t2;
1091
1092    int halfbits = 4 << vece;
1093
1094    t1 = tcg_temp_new_vec_matching(a);
1095    t2 = tcg_temp_new_vec_matching(b);
1096
1097    /* Zero-extend the odd elements for vector */
1098    tcg_gen_shri_vec(vece, t1, a, halfbits);
1099    tcg_gen_shri_vec(vece, t2, b, halfbits);
1100
1101    tcg_gen_sub_vec(vece, t, t1, t2);
1102}
1103
1104static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1105{
1106    TCGv_i32 t1, t2;
1107
1108    t1 = tcg_temp_new_i32();
1109    t2 = tcg_temp_new_i32();
1110    tcg_gen_shri_i32(t1, a, 16);
1111    tcg_gen_shri_i32(t2, b, 16);
1112    tcg_gen_sub_i32(t, t1, t2);
1113}
1114
1115static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1116{
1117    TCGv_i64 t1, t2;
1118
1119    t1 = tcg_temp_new_i64();
1120    t2 = tcg_temp_new_i64();
1121    tcg_gen_shri_i64(t1, a, 32);
1122    tcg_gen_shri_i64(t2, b, 32);
1123    tcg_gen_sub_i64(t, t1, t2);
1124}
1125
1126static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1127                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1128{
1129    static const TCGOpcode vecop_list[] = {
1130        INDEX_op_shri_vec, INDEX_op_sub_vec, 0
1131        };
1132    static const GVecGen3 op[4] = {
1133        {
1134            .fniv = gen_vsubwod_u,
1135            .fno = gen_helper_vsubwod_h_bu,
1136            .opt_opc = vecop_list,
1137            .vece = MO_16
1138        },
1139        {
1140            .fni4 = gen_vsubwod_w_hu,
1141            .fniv = gen_vsubwod_u,
1142            .fno = gen_helper_vsubwod_w_hu,
1143            .opt_opc = vecop_list,
1144            .vece = MO_32
1145        },
1146        {
1147            .fni8 = gen_vsubwod_d_wu,
1148            .fniv = gen_vsubwod_u,
1149            .fno = gen_helper_vsubwod_d_wu,
1150            .opt_opc = vecop_list,
1151            .vece = MO_64
1152        },
1153        {
1154            .fno = gen_helper_vsubwod_q_du,
1155            .vece = MO_128
1156        },
1157    };
1158
1159    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1160}
1161
1162TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u)
1163TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u)
1164TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u)
1165TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u)
1166TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u)
1167TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u)
1168TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u)
1169TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u)
1170
1171static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1172{
1173    TCGv_vec t1, t2, t3;
1174
1175    int halfbits = 4 << vece;
1176
1177    t1 = tcg_temp_new_vec_matching(a);
1178    t2 = tcg_temp_new_vec_matching(b);
1179    t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits));
1180
1181    /* Zero-extend the even elements from a */
1182    tcg_gen_and_vec(vece, t1, a, t3);
1183
1184    /* Sign-extend the even elements from b */
1185    tcg_gen_shli_vec(vece, t2, b, halfbits);
1186    tcg_gen_sari_vec(vece, t2, t2, halfbits);
1187
1188    tcg_gen_add_vec(vece, t, t1, t2);
1189}
1190
1191static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1192{
1193    TCGv_i32 t1, t2;
1194
1195    t1 = tcg_temp_new_i32();
1196    t2 = tcg_temp_new_i32();
1197    tcg_gen_ext16u_i32(t1, a);
1198    tcg_gen_ext16s_i32(t2, b);
1199    tcg_gen_add_i32(t, t1, t2);
1200}
1201
1202static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1203{
1204    TCGv_i64 t1, t2;
1205
1206    t1 = tcg_temp_new_i64();
1207    t2 = tcg_temp_new_i64();
1208    tcg_gen_ext32u_i64(t1, a);
1209    tcg_gen_ext32s_i64(t2, b);
1210    tcg_gen_add_i64(t, t1, t2);
1211}
1212
1213static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1214                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1215{
1216    static const TCGOpcode vecop_list[] = {
1217        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
1218        };
1219    static const GVecGen3 op[4] = {
1220        {
1221            .fniv = gen_vaddwev_u_s,
1222            .fno = gen_helper_vaddwev_h_bu_b,
1223            .opt_opc = vecop_list,
1224            .vece = MO_16
1225        },
1226        {
1227            .fni4 = gen_vaddwev_w_hu_h,
1228            .fniv = gen_vaddwev_u_s,
1229            .fno = gen_helper_vaddwev_w_hu_h,
1230            .opt_opc = vecop_list,
1231            .vece = MO_32
1232        },
1233        {
1234            .fni8 = gen_vaddwev_d_wu_w,
1235            .fniv = gen_vaddwev_u_s,
1236            .fno = gen_helper_vaddwev_d_wu_w,
1237            .opt_opc = vecop_list,
1238            .vece = MO_64
1239        },
1240        {
1241            .fno = gen_helper_vaddwev_q_du_d,
1242            .vece = MO_128
1243        },
1244    };
1245
1246    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1247}
1248
1249TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s)
1250TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s)
1251TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s)
1252TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s)
1253TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s)
1254TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s)
1255TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s)
1256TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s)
1257
1258static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1259{
1260    TCGv_vec t1, t2;
1261
1262    int halfbits = 4 << vece;
1263
1264    t1 = tcg_temp_new_vec_matching(a);
1265    t2 = tcg_temp_new_vec_matching(b);
1266
1267    /* Zero-extend the odd elements from a */
1268    tcg_gen_shri_vec(vece, t1, a, halfbits);
1269    /* Sign-extend the odd elements from b */
1270    tcg_gen_sari_vec(vece, t2, b, halfbits);
1271
1272    tcg_gen_add_vec(vece, t, t1, t2);
1273}
1274
1275static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1276{
1277    TCGv_i32 t1, t2;
1278
1279    t1 = tcg_temp_new_i32();
1280    t2 = tcg_temp_new_i32();
1281    tcg_gen_shri_i32(t1, a, 16);
1282    tcg_gen_sari_i32(t2, b, 16);
1283    tcg_gen_add_i32(t, t1, t2);
1284}
1285
1286static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1287{
1288    TCGv_i64 t1, t2;
1289
1290    t1 = tcg_temp_new_i64();
1291    t2 = tcg_temp_new_i64();
1292    tcg_gen_shri_i64(t1, a, 32);
1293    tcg_gen_sari_i64(t2, b, 32);
1294    tcg_gen_add_i64(t, t1, t2);
1295}
1296
1297static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1298                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1299{
1300    static const TCGOpcode vecop_list[] = {
1301        INDEX_op_shri_vec, INDEX_op_sari_vec,  INDEX_op_add_vec, 0
1302        };
1303    static const GVecGen3 op[4] = {
1304        {
1305            .fniv = gen_vaddwod_u_s,
1306            .fno = gen_helper_vaddwod_h_bu_b,
1307            .opt_opc = vecop_list,
1308            .vece = MO_16
1309        },
1310        {
1311            .fni4 = gen_vaddwod_w_hu_h,
1312            .fniv = gen_vaddwod_u_s,
1313            .fno = gen_helper_vaddwod_w_hu_h,
1314            .opt_opc = vecop_list,
1315            .vece = MO_32
1316        },
1317        {
1318            .fni8 = gen_vaddwod_d_wu_w,
1319            .fniv = gen_vaddwod_u_s,
1320            .fno = gen_helper_vaddwod_d_wu_w,
1321            .opt_opc = vecop_list,
1322            .vece = MO_64
1323        },
1324        {
1325            .fno = gen_helper_vaddwod_q_du_d,
1326            .vece = MO_128
1327        },
1328    };
1329
1330    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1331}
1332
1333TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s)
1334TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s)
1335TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s)
1336TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s)
1337TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s)
1338TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s)
1339TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s)
1340TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s)
1341
1342static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
1343                    void (*gen_shr_vec)(unsigned, TCGv_vec,
1344                                        TCGv_vec, int64_t),
1345                    void (*gen_round_vec)(unsigned, TCGv_vec,
1346                                          TCGv_vec, TCGv_vec))
1347{
1348    TCGv_vec tmp = tcg_temp_new_vec_matching(t);
1349    gen_round_vec(vece, tmp, a, b);
1350    tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
1351    gen_shr_vec(vece, a, a, 1);
1352    gen_shr_vec(vece, b, b, 1);
1353    tcg_gen_add_vec(vece, t, a, b);
1354    tcg_gen_add_vec(vece, t, t, tmp);
1355}
1356
1357static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1358{
1359    do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec);
1360}
1361
1362static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1363{
1364    do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec);
1365}
1366
1367static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1368{
1369    do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec);
1370}
1371
1372static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1373{
1374    do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec);
1375}
1376
1377static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1378                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1379{
1380    static const TCGOpcode vecop_list[] = {
1381        INDEX_op_sari_vec, INDEX_op_add_vec, 0
1382        };
1383    static const GVecGen3 op[4] = {
1384        {
1385            .fniv = gen_vavg_s,
1386            .fno = gen_helper_vavg_b,
1387            .opt_opc = vecop_list,
1388            .vece = MO_8
1389        },
1390        {
1391            .fniv = gen_vavg_s,
1392            .fno = gen_helper_vavg_h,
1393            .opt_opc = vecop_list,
1394            .vece = MO_16
1395        },
1396        {
1397            .fniv = gen_vavg_s,
1398            .fno = gen_helper_vavg_w,
1399            .opt_opc = vecop_list,
1400            .vece = MO_32
1401        },
1402        {
1403            .fniv = gen_vavg_s,
1404            .fno = gen_helper_vavg_d,
1405            .opt_opc = vecop_list,
1406            .vece = MO_64
1407        },
1408    };
1409
1410    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1411}
1412
1413static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1414                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1415{
1416    static const TCGOpcode vecop_list[] = {
1417        INDEX_op_shri_vec, INDEX_op_add_vec, 0
1418        };
1419    static const GVecGen3 op[4] = {
1420        {
1421            .fniv = gen_vavg_u,
1422            .fno = gen_helper_vavg_bu,
1423            .opt_opc = vecop_list,
1424            .vece = MO_8
1425        },
1426        {
1427            .fniv = gen_vavg_u,
1428            .fno = gen_helper_vavg_hu,
1429            .opt_opc = vecop_list,
1430            .vece = MO_16
1431        },
1432        {
1433            .fniv = gen_vavg_u,
1434            .fno = gen_helper_vavg_wu,
1435            .opt_opc = vecop_list,
1436            .vece = MO_32
1437        },
1438        {
1439            .fniv = gen_vavg_u,
1440            .fno = gen_helper_vavg_du,
1441            .opt_opc = vecop_list,
1442            .vece = MO_64
1443        },
1444    };
1445
1446    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1447}
1448
1449TRANS(vavg_b, LSX, gvec_vvv, MO_8, do_vavg_s)
1450TRANS(vavg_h, LSX, gvec_vvv, MO_16, do_vavg_s)
1451TRANS(vavg_w, LSX, gvec_vvv, MO_32, do_vavg_s)
1452TRANS(vavg_d, LSX, gvec_vvv, MO_64, do_vavg_s)
1453TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u)
1454TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u)
1455TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u)
1456TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u)
1457TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s)
1458TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s)
1459TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s)
1460TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s)
1461TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u)
1462TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u)
1463TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u)
1464TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u)
1465
1466static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1467                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1468{
1469    static const TCGOpcode vecop_list[] = {
1470        INDEX_op_sari_vec, INDEX_op_add_vec, 0
1471        };
1472    static const GVecGen3 op[4] = {
1473        {
1474            .fniv = gen_vavgr_s,
1475            .fno = gen_helper_vavgr_b,
1476            .opt_opc = vecop_list,
1477            .vece = MO_8
1478        },
1479        {
1480            .fniv = gen_vavgr_s,
1481            .fno = gen_helper_vavgr_h,
1482            .opt_opc = vecop_list,
1483            .vece = MO_16
1484        },
1485        {
1486            .fniv = gen_vavgr_s,
1487            .fno = gen_helper_vavgr_w,
1488            .opt_opc = vecop_list,
1489            .vece = MO_32
1490        },
1491        {
1492            .fniv = gen_vavgr_s,
1493            .fno = gen_helper_vavgr_d,
1494            .opt_opc = vecop_list,
1495            .vece = MO_64
1496        },
1497    };
1498
1499    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1500}
1501
1502static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1503                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1504{
1505    static const TCGOpcode vecop_list[] = {
1506        INDEX_op_shri_vec, INDEX_op_add_vec, 0
1507        };
1508    static const GVecGen3 op[4] = {
1509        {
1510            .fniv = gen_vavgr_u,
1511            .fno = gen_helper_vavgr_bu,
1512            .opt_opc = vecop_list,
1513            .vece = MO_8
1514        },
1515        {
1516            .fniv = gen_vavgr_u,
1517            .fno = gen_helper_vavgr_hu,
1518            .opt_opc = vecop_list,
1519            .vece = MO_16
1520        },
1521        {
1522            .fniv = gen_vavgr_u,
1523            .fno = gen_helper_vavgr_wu,
1524            .opt_opc = vecop_list,
1525            .vece = MO_32
1526        },
1527        {
1528            .fniv = gen_vavgr_u,
1529            .fno = gen_helper_vavgr_du,
1530            .opt_opc = vecop_list,
1531            .vece = MO_64
1532        },
1533    };
1534
1535    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1536}
1537
1538TRANS(vavgr_b, LSX, gvec_vvv, MO_8, do_vavgr_s)
1539TRANS(vavgr_h, LSX, gvec_vvv, MO_16, do_vavgr_s)
1540TRANS(vavgr_w, LSX, gvec_vvv, MO_32, do_vavgr_s)
1541TRANS(vavgr_d, LSX, gvec_vvv, MO_64, do_vavgr_s)
1542TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u)
1543TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u)
1544TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u)
1545TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u)
1546TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s)
1547TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s)
1548TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s)
1549TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s)
1550TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u)
1551TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u)
1552TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u)
1553TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u)
1554
1555static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1556{
1557    tcg_gen_smax_vec(vece, t, a, b);
1558    tcg_gen_smin_vec(vece, a, a, b);
1559    tcg_gen_sub_vec(vece, t, t, a);
1560}
1561
1562static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1563                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1564{
1565    static const TCGOpcode vecop_list[] = {
1566        INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0
1567        };
1568    static const GVecGen3 op[4] = {
1569        {
1570            .fniv = gen_vabsd_s,
1571            .fno = gen_helper_vabsd_b,
1572            .opt_opc = vecop_list,
1573            .vece = MO_8
1574        },
1575        {
1576            .fniv = gen_vabsd_s,
1577            .fno = gen_helper_vabsd_h,
1578            .opt_opc = vecop_list,
1579            .vece = MO_16
1580        },
1581        {
1582            .fniv = gen_vabsd_s,
1583            .fno = gen_helper_vabsd_w,
1584            .opt_opc = vecop_list,
1585            .vece = MO_32
1586        },
1587        {
1588            .fniv = gen_vabsd_s,
1589            .fno = gen_helper_vabsd_d,
1590            .opt_opc = vecop_list,
1591            .vece = MO_64
1592        },
1593    };
1594
1595    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1596}
1597
1598static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1599{
1600    tcg_gen_umax_vec(vece, t, a, b);
1601    tcg_gen_umin_vec(vece, a, a, b);
1602    tcg_gen_sub_vec(vece, t, t, a);
1603}
1604
1605static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1606                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1607{
1608    static const TCGOpcode vecop_list[] = {
1609        INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
1610        };
1611    static const GVecGen3 op[4] = {
1612        {
1613            .fniv = gen_vabsd_u,
1614            .fno = gen_helper_vabsd_bu,
1615            .opt_opc = vecop_list,
1616            .vece = MO_8
1617        },
1618        {
1619            .fniv = gen_vabsd_u,
1620            .fno = gen_helper_vabsd_hu,
1621            .opt_opc = vecop_list,
1622            .vece = MO_16
1623        },
1624        {
1625            .fniv = gen_vabsd_u,
1626            .fno = gen_helper_vabsd_wu,
1627            .opt_opc = vecop_list,
1628            .vece = MO_32
1629        },
1630        {
1631            .fniv = gen_vabsd_u,
1632            .fno = gen_helper_vabsd_du,
1633            .opt_opc = vecop_list,
1634            .vece = MO_64
1635        },
1636    };
1637
1638    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1639}
1640
1641TRANS(vabsd_b, LSX, gvec_vvv, MO_8, do_vabsd_s)
1642TRANS(vabsd_h, LSX, gvec_vvv, MO_16, do_vabsd_s)
1643TRANS(vabsd_w, LSX, gvec_vvv, MO_32, do_vabsd_s)
1644TRANS(vabsd_d, LSX, gvec_vvv, MO_64, do_vabsd_s)
1645TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u)
1646TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u)
1647TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u)
1648TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u)
1649TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s)
1650TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s)
1651TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s)
1652TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s)
1653TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u)
1654TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u)
1655TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u)
1656TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u)
1657
1658static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1659{
1660    TCGv_vec t1, t2;
1661
1662    t1 = tcg_temp_new_vec_matching(a);
1663    t2 = tcg_temp_new_vec_matching(b);
1664
1665    tcg_gen_abs_vec(vece, t1, a);
1666    tcg_gen_abs_vec(vece, t2, b);
1667    tcg_gen_add_vec(vece, t, t1, t2);
1668}
1669
1670static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1671                     uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1672{
1673    static const TCGOpcode vecop_list[] = {
1674        INDEX_op_abs_vec, INDEX_op_add_vec, 0
1675        };
1676    static const GVecGen3 op[4] = {
1677        {
1678            .fniv = gen_vadda,
1679            .fno = gen_helper_vadda_b,
1680            .opt_opc = vecop_list,
1681            .vece = MO_8
1682        },
1683        {
1684            .fniv = gen_vadda,
1685            .fno = gen_helper_vadda_h,
1686            .opt_opc = vecop_list,
1687            .vece = MO_16
1688        },
1689        {
1690            .fniv = gen_vadda,
1691            .fno = gen_helper_vadda_w,
1692            .opt_opc = vecop_list,
1693            .vece = MO_32
1694        },
1695        {
1696            .fniv = gen_vadda,
1697            .fno = gen_helper_vadda_d,
1698            .opt_opc = vecop_list,
1699            .vece = MO_64
1700        },
1701    };
1702
1703    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1704}
1705
1706TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda)
1707TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda)
1708TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda)
1709TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda)
1710TRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda)
1711TRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda)
1712TRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda)
1713TRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda)
1714
1715TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax)
1716TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax)
1717TRANS(vmax_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smax)
1718TRANS(vmax_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smax)
1719TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax)
1720TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax)
1721TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax)
1722TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax)
1723TRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax)
1724TRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax)
1725TRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax)
1726TRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax)
1727TRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax)
1728TRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax)
1729TRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax)
1730TRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax)
1731
1732TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin)
1733TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin)
1734TRANS(vmin_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smin)
1735TRANS(vmin_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smin)
1736TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin)
1737TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin)
1738TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin)
1739TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin)
1740TRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin)
1741TRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin)
1742TRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin)
1743TRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin)
1744TRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin)
1745TRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin)
1746TRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin)
1747TRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin)
1748
1749static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1750{
1751    tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1752}
1753
1754static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1755{
1756    tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1757}
1758
1759static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1760{
1761    tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1762}
1763
1764static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1765{
1766    tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1767}
1768
1769static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1770                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1771{
1772    static const TCGOpcode vecop_list[] = {
1773        INDEX_op_smin_vec, 0
1774        };
1775    static const GVecGen2i op[4] = {
1776        {
1777            .fniv = gen_vmini_s,
1778            .fnoi = gen_helper_vmini_b,
1779            .opt_opc = vecop_list,
1780            .vece = MO_8
1781        },
1782        {
1783            .fniv = gen_vmini_s,
1784            .fnoi = gen_helper_vmini_h,
1785            .opt_opc = vecop_list,
1786            .vece = MO_16
1787        },
1788        {
1789            .fniv = gen_vmini_s,
1790            .fnoi = gen_helper_vmini_w,
1791            .opt_opc = vecop_list,
1792            .vece = MO_32
1793        },
1794        {
1795            .fniv = gen_vmini_s,
1796            .fnoi = gen_helper_vmini_d,
1797            .opt_opc = vecop_list,
1798            .vece = MO_64
1799        },
1800    };
1801
1802    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1803}
1804
1805static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1806                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1807{
1808    static const TCGOpcode vecop_list[] = {
1809        INDEX_op_umin_vec, 0
1810        };
1811    static const GVecGen2i op[4] = {
1812        {
1813            .fniv = gen_vmini_u,
1814            .fnoi = gen_helper_vmini_bu,
1815            .opt_opc = vecop_list,
1816            .vece = MO_8
1817        },
1818        {
1819            .fniv = gen_vmini_u,
1820            .fnoi = gen_helper_vmini_hu,
1821            .opt_opc = vecop_list,
1822            .vece = MO_16
1823        },
1824        {
1825            .fniv = gen_vmini_u,
1826            .fnoi = gen_helper_vmini_wu,
1827            .opt_opc = vecop_list,
1828            .vece = MO_32
1829        },
1830        {
1831            .fniv = gen_vmini_u,
1832            .fnoi = gen_helper_vmini_du,
1833            .opt_opc = vecop_list,
1834            .vece = MO_64
1835        },
1836    };
1837
1838    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1839}
1840
1841TRANS(vmini_b, LSX, gvec_vv_i, MO_8, do_vmini_s)
1842TRANS(vmini_h, LSX, gvec_vv_i, MO_16, do_vmini_s)
1843TRANS(vmini_w, LSX, gvec_vv_i, MO_32, do_vmini_s)
1844TRANS(vmini_d, LSX, gvec_vv_i, MO_64, do_vmini_s)
1845TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u)
1846TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u)
1847TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u)
1848TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u)
1849TRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s)
1850TRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s)
1851TRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s)
1852TRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s)
1853TRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u)
1854TRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u)
1855TRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u)
1856TRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u)
1857
1858static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1859                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1860{
1861    static const TCGOpcode vecop_list[] = {
1862        INDEX_op_smax_vec, 0
1863        };
1864    static const GVecGen2i op[4] = {
1865        {
1866            .fniv = gen_vmaxi_s,
1867            .fnoi = gen_helper_vmaxi_b,
1868            .opt_opc = vecop_list,
1869            .vece = MO_8
1870        },
1871        {
1872            .fniv = gen_vmaxi_s,
1873            .fnoi = gen_helper_vmaxi_h,
1874            .opt_opc = vecop_list,
1875            .vece = MO_16
1876        },
1877        {
1878            .fniv = gen_vmaxi_s,
1879            .fnoi = gen_helper_vmaxi_w,
1880            .opt_opc = vecop_list,
1881            .vece = MO_32
1882        },
1883        {
1884            .fniv = gen_vmaxi_s,
1885            .fnoi = gen_helper_vmaxi_d,
1886            .opt_opc = vecop_list,
1887            .vece = MO_64
1888        },
1889    };
1890
1891    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1892}
1893
1894static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1895                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1896{
1897    static const TCGOpcode vecop_list[] = {
1898        INDEX_op_umax_vec, 0
1899        };
1900    static const GVecGen2i op[4] = {
1901        {
1902            .fniv = gen_vmaxi_u,
1903            .fnoi = gen_helper_vmaxi_bu,
1904            .opt_opc = vecop_list,
1905            .vece = MO_8
1906        },
1907        {
1908            .fniv = gen_vmaxi_u,
1909            .fnoi = gen_helper_vmaxi_hu,
1910            .opt_opc = vecop_list,
1911            .vece = MO_16
1912        },
1913        {
1914            .fniv = gen_vmaxi_u,
1915            .fnoi = gen_helper_vmaxi_wu,
1916            .opt_opc = vecop_list,
1917            .vece = MO_32
1918        },
1919        {
1920            .fniv = gen_vmaxi_u,
1921            .fnoi = gen_helper_vmaxi_du,
1922            .opt_opc = vecop_list,
1923            .vece = MO_64
1924        },
1925    };
1926
1927    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1928}
1929
1930TRANS(vmaxi_b, LSX, gvec_vv_i, MO_8, do_vmaxi_s)
1931TRANS(vmaxi_h, LSX, gvec_vv_i, MO_16, do_vmaxi_s)
1932TRANS(vmaxi_w, LSX, gvec_vv_i, MO_32, do_vmaxi_s)
1933TRANS(vmaxi_d, LSX, gvec_vv_i, MO_64, do_vmaxi_s)
1934TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u)
1935TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u)
1936TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u)
1937TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u)
1938TRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s)
1939TRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s)
1940TRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s)
1941TRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s)
1942TRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u)
1943TRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u)
1944TRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u)
1945TRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u)
1946
1947TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
1948TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
1949TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul)
1950TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul)
1951TRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul)
1952TRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul)
1953TRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul)
1954TRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul)
1955
1956static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1957{
1958    TCGv_i32 discard = tcg_temp_new_i32();
1959    tcg_gen_muls2_i32(discard, t, a, b);
1960}
1961
1962static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1963{
1964    TCGv_i64 discard = tcg_temp_new_i64();
1965    tcg_gen_muls2_i64(discard, t, a, b);
1966}
1967
1968static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1969                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1970{
1971    static const GVecGen3 op[4] = {
1972        {
1973            .fno = gen_helper_vmuh_b,
1974            .vece = MO_8
1975        },
1976        {
1977            .fno = gen_helper_vmuh_h,
1978            .vece = MO_16
1979        },
1980        {
1981            .fni4 = gen_vmuh_w,
1982            .fno = gen_helper_vmuh_w,
1983            .vece = MO_32
1984        },
1985        {
1986            .fni8 = gen_vmuh_d,
1987            .fno = gen_helper_vmuh_d,
1988            .vece = MO_64
1989        },
1990    };
1991
1992    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1993}
1994
1995TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s)
1996TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s)
1997TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s)
1998TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s)
1999TRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s)
2000TRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s)
2001TRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s)
2002TRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s)
2003
2004static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2005{
2006    TCGv_i32 discard = tcg_temp_new_i32();
2007    tcg_gen_mulu2_i32(discard, t, a, b);
2008}
2009
2010static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2011{
2012    TCGv_i64 discard = tcg_temp_new_i64();
2013    tcg_gen_mulu2_i64(discard, t, a, b);
2014}
2015
2016static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2017                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2018{
2019    static const GVecGen3 op[4] = {
2020        {
2021            .fno = gen_helper_vmuh_bu,
2022            .vece = MO_8
2023        },
2024        {
2025            .fno = gen_helper_vmuh_hu,
2026            .vece = MO_16
2027        },
2028        {
2029            .fni4 = gen_vmuh_wu,
2030            .fno = gen_helper_vmuh_wu,
2031            .vece = MO_32
2032        },
2033        {
2034            .fni8 = gen_vmuh_du,
2035            .fno = gen_helper_vmuh_du,
2036            .vece = MO_64
2037        },
2038    };
2039
2040    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2041}
2042
2043TRANS(vmuh_bu, LSX, gvec_vvv, MO_8,  do_vmuh_u)
2044TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u)
2045TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u)
2046TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u)
2047TRANS(xvmuh_bu, LASX, gvec_xxx, MO_8,  do_vmuh_u)
2048TRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u)
2049TRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u)
2050TRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u)
2051
2052static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2053{
2054    TCGv_vec t1, t2;
2055    int halfbits = 4 << vece;
2056
2057    t1 = tcg_temp_new_vec_matching(a);
2058    t2 = tcg_temp_new_vec_matching(b);
2059    tcg_gen_shli_vec(vece, t1, a, halfbits);
2060    tcg_gen_sari_vec(vece, t1, t1, halfbits);
2061    tcg_gen_shli_vec(vece, t2, b, halfbits);
2062    tcg_gen_sari_vec(vece, t2, t2, halfbits);
2063    tcg_gen_mul_vec(vece, t, t1, t2);
2064}
2065
2066static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2067{
2068    TCGv_i32 t1, t2;
2069
2070    t1 = tcg_temp_new_i32();
2071    t2 = tcg_temp_new_i32();
2072    tcg_gen_ext16s_i32(t1, a);
2073    tcg_gen_ext16s_i32(t2, b);
2074    tcg_gen_mul_i32(t, t1, t2);
2075}
2076
2077static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2078{
2079    TCGv_i64 t1, t2;
2080
2081    t1 = tcg_temp_new_i64();
2082    t2 = tcg_temp_new_i64();
2083    tcg_gen_ext32s_i64(t1, a);
2084    tcg_gen_ext32s_i64(t2, b);
2085    tcg_gen_mul_i64(t, t1, t2);
2086}
2087
2088static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2089                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2090{
2091    static const TCGOpcode vecop_list[] = {
2092        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2093        };
2094    static const GVecGen3 op[3] = {
2095        {
2096            .fniv = gen_vmulwev_s,
2097            .fno = gen_helper_vmulwev_h_b,
2098            .opt_opc = vecop_list,
2099            .vece = MO_16
2100        },
2101        {
2102            .fni4 = gen_vmulwev_w_h,
2103            .fniv = gen_vmulwev_s,
2104            .fno = gen_helper_vmulwev_w_h,
2105            .opt_opc = vecop_list,
2106            .vece = MO_32
2107        },
2108        {
2109            .fni8 = gen_vmulwev_d_w,
2110            .fniv = gen_vmulwev_s,
2111            .fno = gen_helper_vmulwev_d_w,
2112            .opt_opc = vecop_list,
2113            .vece = MO_64
2114        },
2115    };
2116
2117    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2118}
2119
2120TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s)
2121TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s)
2122TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s)
2123TRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s)
2124TRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s)
2125TRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s)
2126
2127static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
2128                               TCGv_i64 arg1, TCGv_i64 arg2)
2129{
2130    tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
2131}
2132
2133static bool gen_vmul_q_vl(DisasContext *ctx,
2134                          arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
2135                          void (*func)(TCGv_i64, TCGv_i64,
2136                                       TCGv_i64, TCGv_i64))
2137{
2138    TCGv_i64 rh, rl, arg1, arg2;
2139    int i;
2140
2141    if (!check_vec(ctx, oprsz)) {
2142        return true;
2143    }
2144
2145    rh = tcg_temp_new_i64();
2146    rl = tcg_temp_new_i64();
2147    arg1 = tcg_temp_new_i64();
2148    arg2 = tcg_temp_new_i64();
2149
2150    for (i = 0; i < oprsz / 16; i++) {
2151        get_vreg64(arg1, a->vj, 2 * i + idx1);
2152        get_vreg64(arg2, a->vk, 2 * i + idx2);
2153
2154        func(rl, rh, arg1, arg2);
2155
2156        set_vreg64(rh, a->vd, 2 * i + 1);
2157        set_vreg64(rl, a->vd, 2 * i);
2158    }
2159
2160    return true;
2161}
2162
2163static bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2164                       void (*func)(TCGv_i64, TCGv_i64,
2165                                    TCGv_i64, TCGv_i64))
2166{
2167    return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func);
2168}
2169
2170static bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2171                        void (*func)(TCGv_i64, TCGv_i64,
2172                                     TCGv_i64, TCGv_i64))
2173{
2174    return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func);
2175}
2176
2177TRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64)
2178TRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64)
2179TRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64)
2180TRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64)
2181TRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64)
2182TRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64)
2183TRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64)
2184TRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64)
2185TRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64)
2186TRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64)
2187TRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64)
2188TRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64)
2189
2190static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2191{
2192    TCGv_vec t1, t2;
2193    int halfbits = 4 << vece;
2194
2195    t1 = tcg_temp_new_vec_matching(a);
2196    t2 = tcg_temp_new_vec_matching(b);
2197    tcg_gen_sari_vec(vece, t1, a, halfbits);
2198    tcg_gen_sari_vec(vece, t2, b, halfbits);
2199    tcg_gen_mul_vec(vece, t, t1, t2);
2200}
2201
2202static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2203{
2204    TCGv_i32 t1, t2;
2205
2206    t1 = tcg_temp_new_i32();
2207    t2 = tcg_temp_new_i32();
2208    tcg_gen_sari_i32(t1, a, 16);
2209    tcg_gen_sari_i32(t2, b, 16);
2210    tcg_gen_mul_i32(t, t1, t2);
2211}
2212
2213static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2214{
2215    TCGv_i64 t1, t2;
2216
2217    t1 = tcg_temp_new_i64();
2218    t2 = tcg_temp_new_i64();
2219    tcg_gen_sari_i64(t1, a, 32);
2220    tcg_gen_sari_i64(t2, b, 32);
2221    tcg_gen_mul_i64(t, t1, t2);
2222}
2223
2224static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2225                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2226{
2227    static const TCGOpcode vecop_list[] = {
2228        INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2229        };
2230    static const GVecGen3 op[3] = {
2231        {
2232            .fniv = gen_vmulwod_s,
2233            .fno = gen_helper_vmulwod_h_b,
2234            .opt_opc = vecop_list,
2235            .vece = MO_16
2236        },
2237        {
2238            .fni4 = gen_vmulwod_w_h,
2239            .fniv = gen_vmulwod_s,
2240            .fno = gen_helper_vmulwod_w_h,
2241            .opt_opc = vecop_list,
2242            .vece = MO_32
2243        },
2244        {
2245            .fni8 = gen_vmulwod_d_w,
2246            .fniv = gen_vmulwod_s,
2247            .fno = gen_helper_vmulwod_d_w,
2248            .opt_opc = vecop_list,
2249            .vece = MO_64
2250        },
2251    };
2252
2253    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2254}
2255
2256TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s)
2257TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s)
2258TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s)
2259TRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s)
2260TRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s)
2261TRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s)
2262
2263static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2264{
2265    TCGv_vec t1, t2, mask;
2266
2267    t1 = tcg_temp_new_vec_matching(a);
2268    t2 = tcg_temp_new_vec_matching(b);
2269    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2270    tcg_gen_and_vec(vece, t1, a, mask);
2271    tcg_gen_and_vec(vece, t2, b, mask);
2272    tcg_gen_mul_vec(vece, t, t1, t2);
2273}
2274
2275static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2276{
2277    TCGv_i32 t1, t2;
2278
2279    t1 = tcg_temp_new_i32();
2280    t2 = tcg_temp_new_i32();
2281    tcg_gen_ext16u_i32(t1, a);
2282    tcg_gen_ext16u_i32(t2, b);
2283    tcg_gen_mul_i32(t, t1, t2);
2284}
2285
2286static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2287{
2288    TCGv_i64 t1, t2;
2289
2290    t1 = tcg_temp_new_i64();
2291    t2 = tcg_temp_new_i64();
2292    tcg_gen_ext32u_i64(t1, a);
2293    tcg_gen_ext32u_i64(t2, b);
2294    tcg_gen_mul_i64(t, t1, t2);
2295}
2296
2297static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2298                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2299{
2300    static const TCGOpcode vecop_list[] = {
2301        INDEX_op_mul_vec, 0
2302        };
2303    static const GVecGen3 op[3] = {
2304        {
2305            .fniv = gen_vmulwev_u,
2306            .fno = gen_helper_vmulwev_h_bu,
2307            .opt_opc = vecop_list,
2308            .vece = MO_16
2309        },
2310        {
2311            .fni4 = gen_vmulwev_w_hu,
2312            .fniv = gen_vmulwev_u,
2313            .fno = gen_helper_vmulwev_w_hu,
2314            .opt_opc = vecop_list,
2315            .vece = MO_32
2316        },
2317        {
2318            .fni8 = gen_vmulwev_d_wu,
2319            .fniv = gen_vmulwev_u,
2320            .fno = gen_helper_vmulwev_d_wu,
2321            .opt_opc = vecop_list,
2322            .vece = MO_64
2323        },
2324    };
2325
2326    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2327}
2328
2329TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u)
2330TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u)
2331TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u)
2332TRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u)
2333TRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u)
2334TRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u)
2335
2336static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2337{
2338    TCGv_vec t1, t2;
2339    int halfbits = 4 << vece;
2340
2341    t1 = tcg_temp_new_vec_matching(a);
2342    t2 = tcg_temp_new_vec_matching(b);
2343    tcg_gen_shri_vec(vece, t1, a, halfbits);
2344    tcg_gen_shri_vec(vece, t2, b, halfbits);
2345    tcg_gen_mul_vec(vece, t, t1, t2);
2346}
2347
2348static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2349{
2350    TCGv_i32 t1, t2;
2351
2352    t1 = tcg_temp_new_i32();
2353    t2 = tcg_temp_new_i32();
2354    tcg_gen_shri_i32(t1, a, 16);
2355    tcg_gen_shri_i32(t2, b, 16);
2356    tcg_gen_mul_i32(t, t1, t2);
2357}
2358
2359static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2360{
2361    TCGv_i64 t1, t2;
2362
2363    t1 = tcg_temp_new_i64();
2364    t2 = tcg_temp_new_i64();
2365    tcg_gen_shri_i64(t1, a, 32);
2366    tcg_gen_shri_i64(t2, b, 32);
2367    tcg_gen_mul_i64(t, t1, t2);
2368}
2369
2370static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2371                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2372{
2373    static const TCGOpcode vecop_list[] = {
2374        INDEX_op_shri_vec, INDEX_op_mul_vec, 0
2375        };
2376    static const GVecGen3 op[3] = {
2377        {
2378            .fniv = gen_vmulwod_u,
2379            .fno = gen_helper_vmulwod_h_bu,
2380            .opt_opc = vecop_list,
2381            .vece = MO_16
2382        },
2383        {
2384            .fni4 = gen_vmulwod_w_hu,
2385            .fniv = gen_vmulwod_u,
2386            .fno = gen_helper_vmulwod_w_hu,
2387            .opt_opc = vecop_list,
2388            .vece = MO_32
2389        },
2390        {
2391            .fni8 = gen_vmulwod_d_wu,
2392            .fniv = gen_vmulwod_u,
2393            .fno = gen_helper_vmulwod_d_wu,
2394            .opt_opc = vecop_list,
2395            .vece = MO_64
2396        },
2397    };
2398
2399    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2400}
2401
2402TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u)
2403TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u)
2404TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u)
2405TRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u)
2406TRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u)
2407TRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u)
2408
2409static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2410{
2411    TCGv_vec t1, t2, mask;
2412    int halfbits = 4 << vece;
2413
2414    t1 = tcg_temp_new_vec_matching(a);
2415    t2 = tcg_temp_new_vec_matching(b);
2416    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2417    tcg_gen_and_vec(vece, t1, a, mask);
2418    tcg_gen_shli_vec(vece, t2, b, halfbits);
2419    tcg_gen_sari_vec(vece, t2, t2, halfbits);
2420    tcg_gen_mul_vec(vece, t, t1, t2);
2421}
2422
2423static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2424{
2425    TCGv_i32 t1, t2;
2426
2427    t1 = tcg_temp_new_i32();
2428    t2 = tcg_temp_new_i32();
2429    tcg_gen_ext16u_i32(t1, a);
2430    tcg_gen_ext16s_i32(t2, b);
2431    tcg_gen_mul_i32(t, t1, t2);
2432}
2433
2434static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2435{
2436    TCGv_i64 t1, t2;
2437
2438    t1 = tcg_temp_new_i64();
2439    t2 = tcg_temp_new_i64();
2440    tcg_gen_ext32u_i64(t1, a);
2441    tcg_gen_ext32s_i64(t2, b);
2442    tcg_gen_mul_i64(t, t1, t2);
2443}
2444
2445static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2446                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2447{
2448    static const TCGOpcode vecop_list[] = {
2449        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2450        };
2451    static const GVecGen3 op[3] = {
2452        {
2453            .fniv = gen_vmulwev_u_s,
2454            .fno = gen_helper_vmulwev_h_bu_b,
2455            .opt_opc = vecop_list,
2456            .vece = MO_16
2457        },
2458        {
2459            .fni4 = gen_vmulwev_w_hu_h,
2460            .fniv = gen_vmulwev_u_s,
2461            .fno = gen_helper_vmulwev_w_hu_h,
2462            .opt_opc = vecop_list,
2463            .vece = MO_32
2464        },
2465        {
2466            .fni8 = gen_vmulwev_d_wu_w,
2467            .fniv = gen_vmulwev_u_s,
2468            .fno = gen_helper_vmulwev_d_wu_w,
2469            .opt_opc = vecop_list,
2470            .vece = MO_64
2471        },
2472    };
2473
2474    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2475}
2476
2477TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s)
2478TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s)
2479TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s)
2480TRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s)
2481TRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s)
2482TRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s)
2483
2484static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2485{
2486    TCGv_vec t1, t2;
2487    int halfbits = 4 << vece;
2488
2489    t1 = tcg_temp_new_vec_matching(a);
2490    t2 = tcg_temp_new_vec_matching(b);
2491    tcg_gen_shri_vec(vece, t1, a, halfbits);
2492    tcg_gen_sari_vec(vece, t2, b, halfbits);
2493    tcg_gen_mul_vec(vece, t, t1, t2);
2494}
2495
2496static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2497{
2498    TCGv_i32 t1, t2;
2499
2500    t1 = tcg_temp_new_i32();
2501    t2 = tcg_temp_new_i32();
2502    tcg_gen_shri_i32(t1, a, 16);
2503    tcg_gen_sari_i32(t2, b, 16);
2504    tcg_gen_mul_i32(t, t1, t2);
2505}
2506static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2507{
2508    TCGv_i64 t1, t2;
2509
2510    t1 = tcg_temp_new_i64();
2511    t2 = tcg_temp_new_i64();
2512    tcg_gen_shri_i64(t1, a, 32);
2513    tcg_gen_sari_i64(t2, b, 32);
2514    tcg_gen_mul_i64(t, t1, t2);
2515}
2516
2517static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2518                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2519{
2520    static const TCGOpcode vecop_list[] = {
2521        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2522        };
2523    static const GVecGen3 op[3] = {
2524        {
2525            .fniv = gen_vmulwod_u_s,
2526            .fno = gen_helper_vmulwod_h_bu_b,
2527            .opt_opc = vecop_list,
2528            .vece = MO_16
2529        },
2530        {
2531            .fni4 = gen_vmulwod_w_hu_h,
2532            .fniv = gen_vmulwod_u_s,
2533            .fno = gen_helper_vmulwod_w_hu_h,
2534            .opt_opc = vecop_list,
2535            .vece = MO_32
2536        },
2537        {
2538            .fni8 = gen_vmulwod_d_wu_w,
2539            .fniv = gen_vmulwod_u_s,
2540            .fno = gen_helper_vmulwod_d_wu_w,
2541            .opt_opc = vecop_list,
2542            .vece = MO_64
2543        },
2544    };
2545
2546    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2547}
2548
2549TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s)
2550TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s)
2551TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s)
2552TRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s)
2553TRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s)
2554TRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s)
2555
2556static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2557{
2558    TCGv_vec t1;
2559
2560    t1 = tcg_temp_new_vec_matching(t);
2561    tcg_gen_mul_vec(vece, t1, a, b);
2562    tcg_gen_add_vec(vece, t, t, t1);
2563}
2564
2565static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2566{
2567    TCGv_i32 t1;
2568
2569    t1 = tcg_temp_new_i32();
2570    tcg_gen_mul_i32(t1, a, b);
2571    tcg_gen_add_i32(t, t, t1);
2572}
2573
2574static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2575{
2576    TCGv_i64 t1;
2577
2578    t1 = tcg_temp_new_i64();
2579    tcg_gen_mul_i64(t1, a, b);
2580    tcg_gen_add_i64(t, t, t1);
2581}
2582
2583static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2584                     uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2585{
2586    static const TCGOpcode vecop_list[] = {
2587        INDEX_op_mul_vec, INDEX_op_add_vec, 0
2588        };
2589    static const GVecGen3 op[4] = {
2590        {
2591            .fniv = gen_vmadd,
2592            .fno = gen_helper_vmadd_b,
2593            .load_dest = true,
2594            .opt_opc = vecop_list,
2595            .vece = MO_8
2596        },
2597        {
2598            .fniv = gen_vmadd,
2599            .fno = gen_helper_vmadd_h,
2600            .load_dest = true,
2601            .opt_opc = vecop_list,
2602            .vece = MO_16
2603        },
2604        {
2605            .fni4 = gen_vmadd_w,
2606            .fniv = gen_vmadd,
2607            .fno = gen_helper_vmadd_w,
2608            .load_dest = true,
2609            .opt_opc = vecop_list,
2610            .vece = MO_32
2611        },
2612        {
2613            .fni8 = gen_vmadd_d,
2614            .fniv = gen_vmadd,
2615            .fno = gen_helper_vmadd_d,
2616            .load_dest = true,
2617            .opt_opc = vecop_list,
2618            .vece = MO_64
2619        },
2620    };
2621
2622    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2623}
2624
2625TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd)
2626TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd)
2627TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd)
2628TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd)
2629TRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd)
2630TRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd)
2631TRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd)
2632TRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd)
2633
2634static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2635{
2636    TCGv_vec t1;
2637
2638    t1 = tcg_temp_new_vec_matching(t);
2639    tcg_gen_mul_vec(vece, t1, a, b);
2640    tcg_gen_sub_vec(vece, t, t, t1);
2641}
2642
2643static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2644{
2645    TCGv_i32 t1;
2646
2647    t1 = tcg_temp_new_i32();
2648    tcg_gen_mul_i32(t1, a, b);
2649    tcg_gen_sub_i32(t, t, t1);
2650}
2651
2652static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2653{
2654    TCGv_i64 t1;
2655
2656    t1 = tcg_temp_new_i64();
2657    tcg_gen_mul_i64(t1, a, b);
2658    tcg_gen_sub_i64(t, t, t1);
2659}
2660
2661static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2662                     uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2663{
2664    static const TCGOpcode vecop_list[] = {
2665        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
2666        };
2667    static const GVecGen3 op[4] = {
2668        {
2669            .fniv = gen_vmsub,
2670            .fno = gen_helper_vmsub_b,
2671            .load_dest = true,
2672            .opt_opc = vecop_list,
2673            .vece = MO_8
2674        },
2675        {
2676            .fniv = gen_vmsub,
2677            .fno = gen_helper_vmsub_h,
2678            .load_dest = true,
2679            .opt_opc = vecop_list,
2680            .vece = MO_16
2681        },
2682        {
2683            .fni4 = gen_vmsub_w,
2684            .fniv = gen_vmsub,
2685            .fno = gen_helper_vmsub_w,
2686            .load_dest = true,
2687            .opt_opc = vecop_list,
2688            .vece = MO_32
2689        },
2690        {
2691            .fni8 = gen_vmsub_d,
2692            .fniv = gen_vmsub,
2693            .fno = gen_helper_vmsub_d,
2694            .load_dest = true,
2695            .opt_opc = vecop_list,
2696            .vece = MO_64
2697        },
2698    };
2699
2700    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2701}
2702
2703TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub)
2704TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub)
2705TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub)
2706TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub)
2707TRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub)
2708TRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub)
2709TRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub)
2710TRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub)
2711
2712static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2713{
2714    TCGv_vec t1, t2, t3;
2715    int halfbits = 4 << vece;
2716
2717    t1 = tcg_temp_new_vec_matching(a);
2718    t2 = tcg_temp_new_vec_matching(b);
2719    t3 = tcg_temp_new_vec_matching(t);
2720    tcg_gen_shli_vec(vece, t1, a, halfbits);
2721    tcg_gen_sari_vec(vece, t1, t1, halfbits);
2722    tcg_gen_shli_vec(vece, t2, b, halfbits);
2723    tcg_gen_sari_vec(vece, t2, t2, halfbits);
2724    tcg_gen_mul_vec(vece, t3, t1, t2);
2725    tcg_gen_add_vec(vece, t, t, t3);
2726}
2727
2728static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2729{
2730    TCGv_i32 t1;
2731
2732    t1 = tcg_temp_new_i32();
2733    gen_vmulwev_w_h(t1, a, b);
2734    tcg_gen_add_i32(t, t, t1);
2735}
2736
2737static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2738{
2739    TCGv_i64 t1;
2740
2741    t1 = tcg_temp_new_i64();
2742    gen_vmulwev_d_w(t1, a, b);
2743    tcg_gen_add_i64(t, t, t1);
2744}
2745
2746static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2747                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2748{
2749    static const TCGOpcode vecop_list[] = {
2750        INDEX_op_shli_vec, INDEX_op_sari_vec,
2751        INDEX_op_mul_vec, INDEX_op_add_vec, 0
2752        };
2753    static const GVecGen3 op[3] = {
2754        {
2755            .fniv = gen_vmaddwev_s,
2756            .fno = gen_helper_vmaddwev_h_b,
2757            .load_dest = true,
2758            .opt_opc = vecop_list,
2759            .vece = MO_16
2760        },
2761        {
2762            .fni4 = gen_vmaddwev_w_h,
2763            .fniv = gen_vmaddwev_s,
2764            .fno = gen_helper_vmaddwev_w_h,
2765            .load_dest = true,
2766            .opt_opc = vecop_list,
2767            .vece = MO_32
2768        },
2769        {
2770            .fni8 = gen_vmaddwev_d_w,
2771            .fniv = gen_vmaddwev_s,
2772            .fno = gen_helper_vmaddwev_d_w,
2773            .load_dest = true,
2774            .opt_opc = vecop_list,
2775            .vece = MO_64
2776        },
2777    };
2778
2779    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2780}
2781
2782TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s)
2783TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s)
2784TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s)
2785TRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s)
2786TRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s)
2787TRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s)
2788
2789static bool gen_vmadd_q_vl(DisasContext * ctx,
2790                           arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
2791                           void (*func)(TCGv_i64, TCGv_i64,
2792                                        TCGv_i64, TCGv_i64))
2793{
2794    TCGv_i64 rh, rl, arg1, arg2, th, tl;
2795    int i;
2796
2797    if (!check_vec(ctx, oprsz)) {
2798        return true;
2799    }
2800
2801    rh = tcg_temp_new_i64();
2802    rl = tcg_temp_new_i64();
2803    arg1 = tcg_temp_new_i64();
2804    arg2 = tcg_temp_new_i64();
2805    th = tcg_temp_new_i64();
2806    tl = tcg_temp_new_i64();
2807
2808    for (i = 0; i < oprsz / 16; i++) {
2809        get_vreg64(arg1, a->vj, 2 * i + idx1);
2810        get_vreg64(arg2, a->vk, 2 * i + idx2);
2811        get_vreg64(rh, a->vd, 2 * i + 1);
2812        get_vreg64(rl, a->vd, 2 * i);
2813
2814        func(tl, th, arg1, arg2);
2815        tcg_gen_add2_i64(rl, rh, rl, rh, tl, th);
2816
2817        set_vreg64(rh, a->vd, 2 * i + 1);
2818        set_vreg64(rl, a->vd, 2 * i);
2819    }
2820
2821    return true;
2822}
2823
2824static bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2825                        void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
2826{
2827    return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func);
2828}
2829
2830static bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2831                         void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
2832{
2833    return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func);
2834}
2835
2836TRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64)
2837TRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64)
2838TRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64)
2839TRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64)
2840TRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64)
2841TRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64)
2842TRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64)
2843TRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64)
2844TRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64)
2845TRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64)
2846TRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64)
2847TRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64)
2848
2849static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2850{
2851    TCGv_vec t1, t2, t3;
2852    int halfbits = 4 << vece;
2853
2854    t1 = tcg_temp_new_vec_matching(a);
2855    t2 = tcg_temp_new_vec_matching(b);
2856    t3 = tcg_temp_new_vec_matching(t);
2857    tcg_gen_sari_vec(vece, t1, a, halfbits);
2858    tcg_gen_sari_vec(vece, t2, b, halfbits);
2859    tcg_gen_mul_vec(vece, t3, t1, t2);
2860    tcg_gen_add_vec(vece, t, t, t3);
2861}
2862
2863static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2864{
2865    TCGv_i32 t1;
2866
2867    t1 = tcg_temp_new_i32();
2868    gen_vmulwod_w_h(t1, a, b);
2869    tcg_gen_add_i32(t, t, t1);
2870}
2871
2872static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2873{
2874    TCGv_i64 t1;
2875
2876    t1 = tcg_temp_new_i64();
2877    gen_vmulwod_d_w(t1, a, b);
2878    tcg_gen_add_i64(t, t, t1);
2879}
2880
2881static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2882                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2883{
2884    static const TCGOpcode vecop_list[] = {
2885        INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
2886        };
2887    static const GVecGen3 op[3] = {
2888        {
2889            .fniv = gen_vmaddwod_s,
2890            .fno = gen_helper_vmaddwod_h_b,
2891            .load_dest = true,
2892            .opt_opc = vecop_list,
2893            .vece = MO_16
2894        },
2895        {
2896            .fni4 = gen_vmaddwod_w_h,
2897            .fniv = gen_vmaddwod_s,
2898            .fno = gen_helper_vmaddwod_w_h,
2899            .load_dest = true,
2900            .opt_opc = vecop_list,
2901            .vece = MO_32
2902        },
2903        {
2904            .fni8 = gen_vmaddwod_d_w,
2905            .fniv = gen_vmaddwod_s,
2906            .fno = gen_helper_vmaddwod_d_w,
2907            .load_dest = true,
2908            .opt_opc = vecop_list,
2909            .vece = MO_64
2910        },
2911    };
2912
2913    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2914}
2915
2916TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s)
2917TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s)
2918TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s)
2919TRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s)
2920TRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s)
2921TRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s)
2922
2923static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2924{
2925    TCGv_vec t1, t2, mask;
2926
2927    t1 = tcg_temp_new_vec_matching(t);
2928    t2 = tcg_temp_new_vec_matching(b);
2929    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2930    tcg_gen_and_vec(vece, t1, a, mask);
2931    tcg_gen_and_vec(vece, t2, b, mask);
2932    tcg_gen_mul_vec(vece, t1, t1, t2);
2933    tcg_gen_add_vec(vece, t, t, t1);
2934}
2935
2936static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2937{
2938    TCGv_i32 t1;
2939
2940    t1 = tcg_temp_new_i32();
2941    gen_vmulwev_w_hu(t1, a, b);
2942    tcg_gen_add_i32(t, t, t1);
2943}
2944
2945static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2946{
2947    TCGv_i64 t1;
2948
2949    t1 = tcg_temp_new_i64();
2950    gen_vmulwev_d_wu(t1, a, b);
2951    tcg_gen_add_i64(t, t, t1);
2952}
2953
2954static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2955                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2956{
2957    static const TCGOpcode vecop_list[] = {
2958        INDEX_op_mul_vec, INDEX_op_add_vec, 0
2959        };
2960    static const GVecGen3 op[3] = {
2961        {
2962            .fniv = gen_vmaddwev_u,
2963            .fno = gen_helper_vmaddwev_h_bu,
2964            .load_dest = true,
2965            .opt_opc = vecop_list,
2966            .vece = MO_16
2967        },
2968        {
2969            .fni4 = gen_vmaddwev_w_hu,
2970            .fniv = gen_vmaddwev_u,
2971            .fno = gen_helper_vmaddwev_w_hu,
2972            .load_dest = true,
2973            .opt_opc = vecop_list,
2974            .vece = MO_32
2975        },
2976        {
2977            .fni8 = gen_vmaddwev_d_wu,
2978            .fniv = gen_vmaddwev_u,
2979            .fno = gen_helper_vmaddwev_d_wu,
2980            .load_dest = true,
2981            .opt_opc = vecop_list,
2982            .vece = MO_64
2983        },
2984    };
2985
2986    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2987}
2988
2989TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u)
2990TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u)
2991TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u)
2992TRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u)
2993TRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u)
2994TRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u)
2995
2996static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2997{
2998    TCGv_vec t1, t2, t3;
2999    int halfbits = 4 << vece;
3000
3001    t1 = tcg_temp_new_vec_matching(a);
3002    t2 = tcg_temp_new_vec_matching(b);
3003    t3 = tcg_temp_new_vec_matching(t);
3004    tcg_gen_shri_vec(vece, t1, a, halfbits);
3005    tcg_gen_shri_vec(vece, t2, b, halfbits);
3006    tcg_gen_mul_vec(vece, t3, t1, t2);
3007    tcg_gen_add_vec(vece, t, t, t3);
3008}
3009
3010static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
3011{
3012    TCGv_i32 t1;
3013
3014    t1 = tcg_temp_new_i32();
3015    gen_vmulwod_w_hu(t1, a, b);
3016    tcg_gen_add_i32(t, t, t1);
3017}
3018
3019static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
3020{
3021    TCGv_i64 t1;
3022
3023    t1 = tcg_temp_new_i64();
3024    gen_vmulwod_d_wu(t1, a, b);
3025    tcg_gen_add_i64(t, t, t1);
3026}
3027
3028static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3029                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3030{
3031    static const TCGOpcode vecop_list[] = {
3032        INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
3033        };
3034    static const GVecGen3 op[3] = {
3035        {
3036            .fniv = gen_vmaddwod_u,
3037            .fno = gen_helper_vmaddwod_h_bu,
3038            .load_dest = true,
3039            .opt_opc = vecop_list,
3040            .vece = MO_16
3041        },
3042        {
3043            .fni4 = gen_vmaddwod_w_hu,
3044            .fniv = gen_vmaddwod_u,
3045            .fno = gen_helper_vmaddwod_w_hu,
3046            .load_dest = true,
3047            .opt_opc = vecop_list,
3048            .vece = MO_32
3049        },
3050        {
3051            .fni8 = gen_vmaddwod_d_wu,
3052            .fniv = gen_vmaddwod_u,
3053            .fno = gen_helper_vmaddwod_d_wu,
3054            .load_dest = true,
3055            .opt_opc = vecop_list,
3056            .vece = MO_64
3057        },
3058    };
3059
3060    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3061}
3062
3063TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u)
3064TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u)
3065TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u)
3066TRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u)
3067TRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u)
3068TRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u)
3069
3070static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3071{
3072    TCGv_vec t1, t2, mask;
3073    int halfbits = 4 << vece;
3074
3075    t1 = tcg_temp_new_vec_matching(a);
3076    t2 = tcg_temp_new_vec_matching(b);
3077    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
3078    tcg_gen_and_vec(vece, t1, a, mask);
3079    tcg_gen_shli_vec(vece, t2, b, halfbits);
3080    tcg_gen_sari_vec(vece, t2, t2, halfbits);
3081    tcg_gen_mul_vec(vece, t1, t1, t2);
3082    tcg_gen_add_vec(vece, t, t, t1);
3083}
3084
3085static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
3086{
3087    TCGv_i32 t1;
3088
3089    t1 = tcg_temp_new_i32();
3090    gen_vmulwev_w_hu_h(t1, a, b);
3091    tcg_gen_add_i32(t, t, t1);
3092}
3093
3094static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
3095{
3096    TCGv_i64 t1;
3097
3098    t1 = tcg_temp_new_i64();
3099    gen_vmulwev_d_wu_w(t1, a, b);
3100    tcg_gen_add_i64(t, t, t1);
3101}
3102
3103static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3104                            uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3105{
3106    static const TCGOpcode vecop_list[] = {
3107        INDEX_op_shli_vec, INDEX_op_sari_vec,
3108        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3109        };
3110    static const GVecGen3 op[3] = {
3111        {
3112            .fniv = gen_vmaddwev_u_s,
3113            .fno = gen_helper_vmaddwev_h_bu_b,
3114            .load_dest = true,
3115            .opt_opc = vecop_list,
3116            .vece = MO_16
3117        },
3118        {
3119            .fni4 = gen_vmaddwev_w_hu_h,
3120            .fniv = gen_vmaddwev_u_s,
3121            .fno = gen_helper_vmaddwev_w_hu_h,
3122            .load_dest = true,
3123            .opt_opc = vecop_list,
3124            .vece = MO_32
3125        },
3126        {
3127            .fni8 = gen_vmaddwev_d_wu_w,
3128            .fniv = gen_vmaddwev_u_s,
3129            .fno = gen_helper_vmaddwev_d_wu_w,
3130            .load_dest = true,
3131            .opt_opc = vecop_list,
3132            .vece = MO_64
3133        },
3134    };
3135
3136    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3137}
3138
3139TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s)
3140TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s)
3141TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s)
3142TRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s)
3143TRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s)
3144TRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s)
3145
3146static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3147{
3148    TCGv_vec t1, t2, t3;
3149    int halfbits = 4 << vece;
3150
3151    t1 = tcg_temp_new_vec_matching(a);
3152    t2 = tcg_temp_new_vec_matching(b);
3153    t3 = tcg_temp_new_vec_matching(t);
3154    tcg_gen_shri_vec(vece, t1, a, halfbits);
3155    tcg_gen_sari_vec(vece, t2, b, halfbits);
3156    tcg_gen_mul_vec(vece, t3, t1, t2);
3157    tcg_gen_add_vec(vece, t, t, t3);
3158}
3159
3160static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
3161{
3162    TCGv_i32 t1;
3163
3164    t1 = tcg_temp_new_i32();
3165    gen_vmulwod_w_hu_h(t1, a, b);
3166    tcg_gen_add_i32(t, t, t1);
3167}
3168
3169static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
3170{
3171    TCGv_i64 t1;
3172
3173    t1 = tcg_temp_new_i64();
3174    gen_vmulwod_d_wu_w(t1, a, b);
3175    tcg_gen_add_i64(t, t, t1);
3176}
3177
3178static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3179                            uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3180{
3181    static const TCGOpcode vecop_list[] = {
3182        INDEX_op_shri_vec, INDEX_op_sari_vec,
3183        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3184        };
3185    static const GVecGen3 op[3] = {
3186        {
3187            .fniv = gen_vmaddwod_u_s,
3188            .fno = gen_helper_vmaddwod_h_bu_b,
3189            .load_dest = true,
3190            .opt_opc = vecop_list,
3191            .vece = MO_16
3192        },
3193        {
3194            .fni4 = gen_vmaddwod_w_hu_h,
3195            .fniv = gen_vmaddwod_u_s,
3196            .fno = gen_helper_vmaddwod_w_hu_h,
3197            .load_dest = true,
3198            .opt_opc = vecop_list,
3199            .vece = MO_32
3200        },
3201        {
3202            .fni8 = gen_vmaddwod_d_wu_w,
3203            .fniv = gen_vmaddwod_u_s,
3204            .fno = gen_helper_vmaddwod_d_wu_w,
3205            .load_dest = true,
3206            .opt_opc = vecop_list,
3207            .vece = MO_64
3208        },
3209    };
3210
3211    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3212}
3213
3214TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s)
3215TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s)
3216TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s)
3217TRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s)
3218TRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s)
3219TRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s)
3220
3221TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b)
3222TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h)
3223TRANS(vdiv_w, LSX, gen_vvv, gen_helper_vdiv_w)
3224TRANS(vdiv_d, LSX, gen_vvv, gen_helper_vdiv_d)
3225TRANS(vdiv_bu, LSX, gen_vvv, gen_helper_vdiv_bu)
3226TRANS(vdiv_hu, LSX, gen_vvv, gen_helper_vdiv_hu)
3227TRANS(vdiv_wu, LSX, gen_vvv, gen_helper_vdiv_wu)
3228TRANS(vdiv_du, LSX, gen_vvv, gen_helper_vdiv_du)
3229TRANS(vmod_b, LSX, gen_vvv, gen_helper_vmod_b)
3230TRANS(vmod_h, LSX, gen_vvv, gen_helper_vmod_h)
3231TRANS(vmod_w, LSX, gen_vvv, gen_helper_vmod_w)
3232TRANS(vmod_d, LSX, gen_vvv, gen_helper_vmod_d)
3233TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu)
3234TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu)
3235TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu)
3236TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du)
3237TRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b)
3238TRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h)
3239TRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w)
3240TRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d)
3241TRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu)
3242TRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu)
3243TRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu)
3244TRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du)
3245TRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b)
3246TRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h)
3247TRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w)
3248TRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d)
3249TRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu)
3250TRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu)
3251TRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu)
3252TRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du)
3253
3254static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
3255{
3256    TCGv_vec min;
3257
3258    min = tcg_temp_new_vec_matching(t);
3259    tcg_gen_not_vec(vece, min, max);
3260    tcg_gen_smax_vec(vece, t, a, min);
3261    tcg_gen_smin_vec(vece, t, t, max);
3262}
3263
3264static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3265                      int64_t imm, uint32_t oprsz, uint32_t maxsz)
3266{
3267    static const TCGOpcode vecop_list[] = {
3268        INDEX_op_smax_vec, INDEX_op_smin_vec, 0
3269        };
3270    static const GVecGen2s op[4] = {
3271        {
3272            .fniv = gen_vsat_s,
3273            .fno = gen_helper_vsat_b,
3274            .opt_opc = vecop_list,
3275            .vece = MO_8
3276        },
3277        {
3278            .fniv = gen_vsat_s,
3279            .fno = gen_helper_vsat_h,
3280            .opt_opc = vecop_list,
3281            .vece = MO_16
3282        },
3283        {
3284            .fniv = gen_vsat_s,
3285            .fno = gen_helper_vsat_w,
3286            .opt_opc = vecop_list,
3287            .vece = MO_32
3288        },
3289        {
3290            .fniv = gen_vsat_s,
3291            .fno = gen_helper_vsat_d,
3292            .opt_opc = vecop_list,
3293            .vece = MO_64
3294        },
3295    };
3296
3297    tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
3298                    tcg_constant_i64((1ll<< imm) -1), &op[vece]);
3299}
3300
3301TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s)
3302TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s)
3303TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s)
3304TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s)
3305TRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s)
3306TRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s)
3307TRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s)
3308TRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s)
3309
3310static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
3311{
3312    tcg_gen_umin_vec(vece, t, a, max);
3313}
3314
3315static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3316                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
3317{
3318    uint64_t max;
3319    static const TCGOpcode vecop_list[] = {
3320        INDEX_op_umin_vec, 0
3321        };
3322    static const GVecGen2s op[4] = {
3323        {
3324            .fniv = gen_vsat_u,
3325            .fno = gen_helper_vsat_bu,
3326            .opt_opc = vecop_list,
3327            .vece = MO_8
3328        },
3329        {
3330            .fniv = gen_vsat_u,
3331            .fno = gen_helper_vsat_hu,
3332            .opt_opc = vecop_list,
3333            .vece = MO_16
3334        },
3335        {
3336            .fniv = gen_vsat_u,
3337            .fno = gen_helper_vsat_wu,
3338            .opt_opc = vecop_list,
3339            .vece = MO_32
3340        },
3341        {
3342            .fniv = gen_vsat_u,
3343            .fno = gen_helper_vsat_du,
3344            .opt_opc = vecop_list,
3345            .vece = MO_64
3346        },
3347    };
3348
3349    max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1;
3350    tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
3351                    tcg_constant_i64(max), &op[vece]);
3352}
3353
3354TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u)
3355TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u)
3356TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u)
3357TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u)
3358TRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u)
3359TRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u)
3360TRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u)
3361TRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u)
3362
3363TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b)
3364TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h)
3365TRANS(vexth_d_w, LSX, gen_vv, gen_helper_vexth_d_w)
3366TRANS(vexth_q_d, LSX, gen_vv, gen_helper_vexth_q_d)
3367TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu)
3368TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu)
3369TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu)
3370TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du)
3371TRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b)
3372TRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h)
3373TRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w)
3374TRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d)
3375TRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu)
3376TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu)
3377TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu)
3378TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du)
3379
3380TRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b)
3381TRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b)
3382TRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b)
3383TRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h)
3384TRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h)
3385TRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w)
3386TRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu)
3387TRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu)
3388TRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu)
3389TRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu)
3390TRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu)
3391TRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu)
3392
3393static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3394{
3395    TCGv_vec t1, zero;
3396
3397    t1 = tcg_temp_new_vec_matching(t);
3398    zero = tcg_constant_vec_matching(t, vece, 0);
3399
3400    tcg_gen_neg_vec(vece, t1, b);
3401    tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b);
3402    tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t);
3403}
3404
3405static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3406                        uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3407{
3408    static const TCGOpcode vecop_list[] = {
3409        INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0
3410        };
3411    static const GVecGen3 op[4] = {
3412        {
3413            .fniv = gen_vsigncov,
3414            .fno = gen_helper_vsigncov_b,
3415            .opt_opc = vecop_list,
3416            .vece = MO_8
3417        },
3418        {
3419            .fniv = gen_vsigncov,
3420            .fno = gen_helper_vsigncov_h,
3421            .opt_opc = vecop_list,
3422            .vece = MO_16
3423        },
3424        {
3425            .fniv = gen_vsigncov,
3426            .fno = gen_helper_vsigncov_w,
3427            .opt_opc = vecop_list,
3428            .vece = MO_32
3429        },
3430        {
3431            .fniv = gen_vsigncov,
3432            .fno = gen_helper_vsigncov_d,
3433            .opt_opc = vecop_list,
3434            .vece = MO_64
3435        },
3436    };
3437
3438    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3439}
3440
3441TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov)
3442TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov)
3443TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov)
3444TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov)
3445TRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov)
3446TRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov)
3447TRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov)
3448TRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov)
3449
3450TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b)
3451TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h)
3452TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w)
3453TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d)
3454TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b)
3455TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b)
3456TRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b)
3457TRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h)
3458TRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w)
3459TRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d)
3460TRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b)
3461TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b)
3462
3463#define EXPAND_BYTE(bit)  ((uint64_t)(bit ? 0xff : 0))
3464
3465static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
3466{
3467    int mode;
3468    uint64_t data, t;
3469
3470    /*
3471     * imm bit [11:8] is mode, mode value is 0-12.
3472     * other values are invalid.
3473     */
3474    mode = (imm >> 8) & 0xf;
3475    t =  imm & 0xff;
3476    switch (mode) {
3477    case 0:
3478        /* data: {2{24'0, imm[7:0]}} */
3479        data =  (t << 32) | t ;
3480        break;
3481    case 1:
3482        /* data: {2{16'0, imm[7:0], 8'0}} */
3483        data = (t << 24) | (t << 8);
3484        break;
3485    case 2:
3486        /* data: {2{8'0, imm[7:0], 16'0}} */
3487        data = (t << 48) | (t << 16);
3488        break;
3489    case 3:
3490        /* data: {2{imm[7:0], 24'0}} */
3491        data = (t << 56) | (t << 24);
3492        break;
3493    case 4:
3494        /* data: {4{8'0, imm[7:0]}} */
3495        data = (t << 48) | (t << 32) | (t << 16) | t;
3496        break;
3497    case 5:
3498        /* data: {4{imm[7:0], 8'0}} */
3499        data = (t << 56) |(t << 40) | (t << 24) | (t << 8);
3500        break;
3501    case 6:
3502        /* data: {2{16'0, imm[7:0], 8'1}} */
3503        data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff;
3504        break;
3505    case 7:
3506        /* data: {2{8'0, imm[7:0], 16'1}} */
3507        data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff;
3508        break;
3509    case 8:
3510        /* data: {8{imm[7:0]}} */
3511        data =(t << 56) | (t << 48) | (t << 40) | (t << 32) |
3512              (t << 24) | (t << 16) | (t << 8) | t;
3513        break;
3514    case 9:
3515        /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */
3516        {
3517            uint64_t b0,b1,b2,b3,b4,b5,b6,b7;
3518            b0 = t& 0x1;
3519            b1 = (t & 0x2) >> 1;
3520            b2 = (t & 0x4) >> 2;
3521            b3 = (t & 0x8) >> 3;
3522            b4 = (t & 0x10) >> 4;
3523            b5 = (t & 0x20) >> 5;
3524            b6 = (t & 0x40) >> 6;
3525            b7 = (t & 0x80) >> 7;
3526            data = (EXPAND_BYTE(b7) << 56) |
3527                   (EXPAND_BYTE(b6) << 48) |
3528                   (EXPAND_BYTE(b5) << 40) |
3529                   (EXPAND_BYTE(b4) << 32) |
3530                   (EXPAND_BYTE(b3) << 24) |
3531                   (EXPAND_BYTE(b2) << 16) |
3532                   (EXPAND_BYTE(b1) <<  8) |
3533                   EXPAND_BYTE(b0);
3534        }
3535        break;
3536    case 10:
3537        /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */
3538        {
3539            uint64_t b6, b7;
3540            uint64_t t0, t1;
3541            b6 = (imm & 0x40) >> 6;
3542            b7 = (imm & 0x80) >> 7;
3543            t0 = (imm & 0x3f);
3544            t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0);
3545            data  = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19);
3546        }
3547        break;
3548    case 11:
3549        /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */
3550        {
3551            uint64_t b6,b7;
3552            uint64_t t0, t1;
3553            b6 = (imm & 0x40) >> 6;
3554            b7 = (imm & 0x80) >> 7;
3555            t0 = (imm & 0x3f);
3556            t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0);
3557            data = (t1 << 25) | (t0 << 19);
3558        }
3559        break;
3560    case 12:
3561        /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */
3562        {
3563            uint64_t b6,b7;
3564            uint64_t t0, t1;
3565            b6 = (imm & 0x40) >> 6;
3566            b7 = (imm & 0x80) >> 7;
3567            t0 = (imm & 0x3f);
3568            t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0);
3569            data = (t1 << 54) | (t0 << 48);
3570        }
3571        break;
3572    default:
3573        generate_exception(ctx, EXCCODE_INE);
3574        g_assert_not_reached();
3575    }
3576    return data;
3577}
3578
3579static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz)
3580{
3581    int sel, vece;
3582    uint64_t value;
3583
3584    if (!check_vec(ctx, oprsz)) {
3585        return true;
3586    }
3587
3588    sel = (a->imm >> 12) & 0x1;
3589
3590    if (sel) {
3591        value = vldi_get_value(ctx, a->imm);
3592        vece = MO_64;
3593    } else {
3594        value = ((int32_t)(a->imm << 22)) >> 22;
3595        vece = (a->imm >> 10) & 0x3;
3596    }
3597
3598    tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8,
3599                         tcg_constant_i64(value));
3600    return true;
3601}
3602
3603TRANS(vldi, LSX, gen_vldi, 16)
3604TRANS(xvldi, LASX, gen_vldi, 32)
3605
3606static bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz)
3607{
3608    uint32_t vd_ofs, vj_ofs, vk_ofs;
3609
3610    if (!check_vec(ctx, oprsz)) {
3611        return true;
3612    }
3613
3614    vd_ofs = vec_full_offset(a->vd);
3615    vj_ofs = vec_full_offset(a->vj);
3616    vk_ofs = vec_full_offset(a->vk);
3617
3618    tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8);
3619    return true;
3620}
3621
3622static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3623{
3624    TCGv_vec t1;
3625
3626    t1 = tcg_constant_vec_matching(t, vece, imm);
3627    tcg_gen_nor_vec(vece, t, a, t1);
3628}
3629
3630static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm)
3631{
3632    tcg_gen_movi_i64(t, dup_const(MO_8, imm));
3633    tcg_gen_nor_i64(t, a, t);
3634}
3635
3636static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3637                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
3638{
3639    static const TCGOpcode vecop_list[] = {
3640        INDEX_op_nor_vec, 0
3641        };
3642    static const GVecGen2i op = {
3643       .fni8 = gen_vnori_b,
3644       .fniv = gen_vnori,
3645       .fnoi = gen_helper_vnori_b,
3646       .opt_opc = vecop_list,
3647       .vece = MO_8
3648    };
3649
3650    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
3651}
3652
3653TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
3654TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
3655TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
3656TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
3657TRANS(vandn_v, LSX, gen_vandn_v, 16)
3658TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
3659TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
3660TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
3661TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
3662TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b)
3663TRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and)
3664TRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or)
3665TRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor)
3666TRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor)
3667TRANS(xvandn_v, LASX, gen_vandn_v, 32)
3668TRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc)
3669TRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi)
3670TRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori)
3671TRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori)
3672TRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b)
3673
3674TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
3675TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
3676TRANS(vsll_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shlv)
3677TRANS(vsll_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shlv)
3678TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
3679TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
3680TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
3681TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
3682TRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv)
3683TRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv)
3684TRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv)
3685TRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv)
3686TRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli)
3687TRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli)
3688TRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli)
3689TRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli)
3690
3691TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
3692TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
3693TRANS(vsrl_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shrv)
3694TRANS(vsrl_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shrv)
3695TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
3696TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
3697TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
3698TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
3699TRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv)
3700TRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv)
3701TRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv)
3702TRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv)
3703TRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri)
3704TRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri)
3705TRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri)
3706TRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri)
3707
3708TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
3709TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
3710TRANS(vsra_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sarv)
3711TRANS(vsra_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sarv)
3712TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
3713TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
3714TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
3715TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
3716TRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv)
3717TRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv)
3718TRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv)
3719TRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv)
3720TRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari)
3721TRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari)
3722TRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari)
3723TRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari)
3724
3725TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
3726TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
3727TRANS(vrotr_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_rotrv)
3728TRANS(vrotr_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_rotrv)
3729TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
3730TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
3731TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
3732TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
3733TRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv)
3734TRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv)
3735TRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv)
3736TRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv)
3737TRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri)
3738TRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri)
3739TRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri)
3740TRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri)
3741
3742TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b)
3743TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h)
3744TRANS(vsllwil_d_w, LSX, gen_vv_i, gen_helper_vsllwil_d_w)
3745TRANS(vextl_q_d, LSX, gen_vv, gen_helper_vextl_q_d)
3746TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu)
3747TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu)
3748TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu)
3749TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du)
3750TRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b)
3751TRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h)
3752TRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w)
3753TRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d)
3754TRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu)
3755TRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu)
3756TRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu)
3757TRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du)
3758
3759TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b)
3760TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h)
3761TRANS(vsrlr_w, LSX, gen_vvv, gen_helper_vsrlr_w)
3762TRANS(vsrlr_d, LSX, gen_vvv, gen_helper_vsrlr_d)
3763TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b)
3764TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h)
3765TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w)
3766TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d)
3767TRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b)
3768TRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h)
3769TRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w)
3770TRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d)
3771TRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b)
3772TRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h)
3773TRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w)
3774TRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d)
3775
3776TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b)
3777TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h)
3778TRANS(vsrar_w, LSX, gen_vvv, gen_helper_vsrar_w)
3779TRANS(vsrar_d, LSX, gen_vvv, gen_helper_vsrar_d)
3780TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b)
3781TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h)
3782TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w)
3783TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d)
3784TRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b)
3785TRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h)
3786TRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w)
3787TRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d)
3788TRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b)
3789TRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h)
3790TRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w)
3791TRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d)
3792
3793TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h)
3794TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w)
3795TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d)
3796TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h)
3797TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w)
3798TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d)
3799TRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h)
3800TRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w)
3801TRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d)
3802TRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h)
3803TRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w)
3804TRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d)
3805
3806TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h)
3807TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w)
3808TRANS(vsrlni_w_d, LSX, gen_vv_i, gen_helper_vsrlni_w_d)
3809TRANS(vsrlni_d_q, LSX, gen_vv_i, gen_helper_vsrlni_d_q)
3810TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h)
3811TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w)
3812TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d)
3813TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q)
3814TRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h)
3815TRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w)
3816TRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d)
3817TRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q)
3818TRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h)
3819TRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w)
3820TRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d)
3821TRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q)
3822
3823TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h)
3824TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w)
3825TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d)
3826TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h)
3827TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w)
3828TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d)
3829TRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h)
3830TRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w)
3831TRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d)
3832TRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h)
3833TRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w)
3834TRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d)
3835
3836TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h)
3837TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w)
3838TRANS(vsrlrni_w_d, LSX, gen_vv_i, gen_helper_vsrlrni_w_d)
3839TRANS(vsrlrni_d_q, LSX, gen_vv_i, gen_helper_vsrlrni_d_q)
3840TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h)
3841TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w)
3842TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d)
3843TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q)
3844TRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h)
3845TRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w)
3846TRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d)
3847TRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q)
3848TRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h)
3849TRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w)
3850TRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d)
3851TRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q)
3852
3853TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h)
3854TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w)
3855TRANS(vssrln_w_d, LSX, gen_vvv, gen_helper_vssrln_w_d)
3856TRANS(vssran_b_h, LSX, gen_vvv, gen_helper_vssran_b_h)
3857TRANS(vssran_h_w, LSX, gen_vvv, gen_helper_vssran_h_w)
3858TRANS(vssran_w_d, LSX, gen_vvv, gen_helper_vssran_w_d)
3859TRANS(vssrln_bu_h, LSX, gen_vvv, gen_helper_vssrln_bu_h)
3860TRANS(vssrln_hu_w, LSX, gen_vvv, gen_helper_vssrln_hu_w)
3861TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d)
3862TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h)
3863TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w)
3864TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d)
3865TRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h)
3866TRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w)
3867TRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d)
3868TRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h)
3869TRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w)
3870TRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d)
3871TRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h)
3872TRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w)
3873TRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d)
3874TRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h)
3875TRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w)
3876TRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d)
3877
3878TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h)
3879TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w)
3880TRANS(vssrlni_w_d, LSX, gen_vv_i, gen_helper_vssrlni_w_d)
3881TRANS(vssrlni_d_q, LSX, gen_vv_i, gen_helper_vssrlni_d_q)
3882TRANS(vssrani_b_h, LSX, gen_vv_i, gen_helper_vssrani_b_h)
3883TRANS(vssrani_h_w, LSX, gen_vv_i, gen_helper_vssrani_h_w)
3884TRANS(vssrani_w_d, LSX, gen_vv_i, gen_helper_vssrani_w_d)
3885TRANS(vssrani_d_q, LSX, gen_vv_i, gen_helper_vssrani_d_q)
3886TRANS(vssrlni_bu_h, LSX, gen_vv_i, gen_helper_vssrlni_bu_h)
3887TRANS(vssrlni_hu_w, LSX, gen_vv_i, gen_helper_vssrlni_hu_w)
3888TRANS(vssrlni_wu_d, LSX, gen_vv_i, gen_helper_vssrlni_wu_d)
3889TRANS(vssrlni_du_q, LSX, gen_vv_i, gen_helper_vssrlni_du_q)
3890TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h)
3891TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w)
3892TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d)
3893TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q)
3894TRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h)
3895TRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w)
3896TRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d)
3897TRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q)
3898TRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h)
3899TRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w)
3900TRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d)
3901TRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q)
3902TRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h)
3903TRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w)
3904TRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d)
3905TRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q)
3906TRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h)
3907TRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w)
3908TRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d)
3909TRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q)
3910
3911TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h)
3912TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w)
3913TRANS(vssrlrn_w_d, LSX, gen_vvv, gen_helper_vssrlrn_w_d)
3914TRANS(vssrarn_b_h, LSX, gen_vvv, gen_helper_vssrarn_b_h)
3915TRANS(vssrarn_h_w, LSX, gen_vvv, gen_helper_vssrarn_h_w)
3916TRANS(vssrarn_w_d, LSX, gen_vvv, gen_helper_vssrarn_w_d)
3917TRANS(vssrlrn_bu_h, LSX, gen_vvv, gen_helper_vssrlrn_bu_h)
3918TRANS(vssrlrn_hu_w, LSX, gen_vvv, gen_helper_vssrlrn_hu_w)
3919TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d)
3920TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h)
3921TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w)
3922TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d)
3923TRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h)
3924TRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w)
3925TRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d)
3926TRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h)
3927TRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w)
3928TRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d)
3929TRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h)
3930TRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w)
3931TRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d)
3932TRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h)
3933TRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w)
3934TRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d)
3935
3936TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h)
3937TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w)
3938TRANS(vssrlrni_w_d, LSX, gen_vv_i, gen_helper_vssrlrni_w_d)
3939TRANS(vssrlrni_d_q, LSX, gen_vv_i, gen_helper_vssrlrni_d_q)
3940TRANS(vssrarni_b_h, LSX, gen_vv_i, gen_helper_vssrarni_b_h)
3941TRANS(vssrarni_h_w, LSX, gen_vv_i, gen_helper_vssrarni_h_w)
3942TRANS(vssrarni_w_d, LSX, gen_vv_i, gen_helper_vssrarni_w_d)
3943TRANS(vssrarni_d_q, LSX, gen_vv_i, gen_helper_vssrarni_d_q)
3944TRANS(vssrlrni_bu_h, LSX, gen_vv_i, gen_helper_vssrlrni_bu_h)
3945TRANS(vssrlrni_hu_w, LSX, gen_vv_i, gen_helper_vssrlrni_hu_w)
3946TRANS(vssrlrni_wu_d, LSX, gen_vv_i, gen_helper_vssrlrni_wu_d)
3947TRANS(vssrlrni_du_q, LSX, gen_vv_i, gen_helper_vssrlrni_du_q)
3948TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h)
3949TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w)
3950TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d)
3951TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q)
3952TRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h)
3953TRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w)
3954TRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d)
3955TRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q)
3956TRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h)
3957TRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w)
3958TRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d)
3959TRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q)
3960TRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h)
3961TRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w)
3962TRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d)
3963TRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q)
3964TRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h)
3965TRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w)
3966TRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d)
3967TRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q)
3968
3969TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b)
3970TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h)
3971TRANS(vclo_w, LSX, gen_vv, gen_helper_vclo_w)
3972TRANS(vclo_d, LSX, gen_vv, gen_helper_vclo_d)
3973TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b)
3974TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h)
3975TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w)
3976TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d)
3977TRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b)
3978TRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h)
3979TRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w)
3980TRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d)
3981TRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b)
3982TRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h)
3983TRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w)
3984TRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d)
3985
3986TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
3987TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
3988TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w)
3989TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d)
3990TRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b)
3991TRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h)
3992TRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w)
3993TRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d)
3994
3995static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
3996                    void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
3997{
3998    TCGv_vec mask, lsh, t1, one;
3999
4000    lsh = tcg_temp_new_vec_matching(t);
4001    t1 = tcg_temp_new_vec_matching(t);
4002    mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1);
4003    one = tcg_constant_vec_matching(t, vece, 1);
4004
4005    tcg_gen_and_vec(vece, lsh, b, mask);
4006    tcg_gen_shlv_vec(vece, t1, one, lsh);
4007    func(vece, t, a, t1);
4008}
4009
4010static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
4011{
4012    do_vbit(vece, t, a, b, tcg_gen_andc_vec);
4013}
4014
4015static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
4016{
4017    do_vbit(vece, t, a, b, tcg_gen_or_vec);
4018}
4019
4020static void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
4021{
4022    do_vbit(vece, t, a, b, tcg_gen_xor_vec);
4023}
4024
4025static void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4026                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
4027{
4028    static const TCGOpcode vecop_list[] = {
4029        INDEX_op_shlv_vec, INDEX_op_andc_vec, 0
4030        };
4031    static const GVecGen3 op[4] = {
4032        {
4033            .fniv = gen_vbitclr,
4034            .fno = gen_helper_vbitclr_b,
4035            .opt_opc = vecop_list,
4036            .vece = MO_8
4037        },
4038        {
4039            .fniv = gen_vbitclr,
4040            .fno = gen_helper_vbitclr_h,
4041            .opt_opc = vecop_list,
4042            .vece = MO_16
4043        },
4044        {
4045            .fniv = gen_vbitclr,
4046            .fno = gen_helper_vbitclr_w,
4047            .opt_opc = vecop_list,
4048            .vece = MO_32
4049        },
4050        {
4051            .fniv = gen_vbitclr,
4052            .fno = gen_helper_vbitclr_d,
4053            .opt_opc = vecop_list,
4054            .vece = MO_64
4055        },
4056    };
4057
4058    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
4059}
4060
4061TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr)
4062TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr)
4063TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr)
4064TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr)
4065TRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr)
4066TRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr)
4067TRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr)
4068TRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr)
4069
4070static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
4071                     void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
4072{
4073    int lsh;
4074    TCGv_vec t1, one;
4075
4076    lsh = imm & ((8 << vece) -1);
4077    t1 = tcg_temp_new_vec_matching(t);
4078    one = tcg_constant_vec_matching(t, vece, 1);
4079
4080    tcg_gen_shli_vec(vece, t1, one, lsh);
4081    func(vece, t, a, t1);
4082}
4083
4084static void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4085{
4086    do_vbiti(vece, t, a, imm, tcg_gen_andc_vec);
4087}
4088
4089static void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4090{
4091    do_vbiti(vece, t, a, imm, tcg_gen_or_vec);
4092}
4093
4094static void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4095{
4096    do_vbiti(vece, t, a, imm, tcg_gen_xor_vec);
4097}
4098
4099static void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4100                        int64_t imm, uint32_t oprsz, uint32_t maxsz)
4101{
4102    static const TCGOpcode vecop_list[] = {
4103        INDEX_op_shli_vec, INDEX_op_andc_vec, 0
4104        };
4105    static const GVecGen2i op[4] = {
4106        {
4107            .fniv = gen_vbitclri,
4108            .fnoi = gen_helper_vbitclri_b,
4109            .opt_opc = vecop_list,
4110            .vece = MO_8
4111        },
4112        {
4113            .fniv = gen_vbitclri,
4114            .fnoi = gen_helper_vbitclri_h,
4115            .opt_opc = vecop_list,
4116            .vece = MO_16
4117        },
4118        {
4119            .fniv = gen_vbitclri,
4120            .fnoi = gen_helper_vbitclri_w,
4121            .opt_opc = vecop_list,
4122            .vece = MO_32
4123        },
4124        {
4125            .fniv = gen_vbitclri,
4126            .fnoi = gen_helper_vbitclri_d,
4127            .opt_opc = vecop_list,
4128            .vece = MO_64
4129        },
4130    };
4131
4132    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
4133}
4134
4135TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri)
4136TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri)
4137TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri)
4138TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri)
4139TRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri)
4140TRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri)
4141TRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri)
4142TRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri)
4143
4144static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4145                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
4146{
4147    static const TCGOpcode vecop_list[] = {
4148        INDEX_op_shlv_vec, 0
4149        };
4150    static const GVecGen3 op[4] = {
4151        {
4152            .fniv = gen_vbitset,
4153            .fno = gen_helper_vbitset_b,
4154            .opt_opc = vecop_list,
4155            .vece = MO_8
4156        },
4157        {
4158            .fniv = gen_vbitset,
4159            .fno = gen_helper_vbitset_h,
4160            .opt_opc = vecop_list,
4161            .vece = MO_16
4162        },
4163        {
4164            .fniv = gen_vbitset,
4165            .fno = gen_helper_vbitset_w,
4166            .opt_opc = vecop_list,
4167            .vece = MO_32
4168        },
4169        {
4170            .fniv = gen_vbitset,
4171            .fno = gen_helper_vbitset_d,
4172            .opt_opc = vecop_list,
4173            .vece = MO_64
4174        },
4175    };
4176
4177    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
4178}
4179
4180TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset)
4181TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset)
4182TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset)
4183TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset)
4184TRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset)
4185TRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset)
4186TRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset)
4187TRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset)
4188
4189static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4190                        int64_t imm, uint32_t oprsz, uint32_t maxsz)
4191{
4192    static const TCGOpcode vecop_list[] = {
4193        INDEX_op_shli_vec, 0
4194        };
4195    static const GVecGen2i op[4] = {
4196        {
4197            .fniv = gen_vbitseti,
4198            .fnoi = gen_helper_vbitseti_b,
4199            .opt_opc = vecop_list,
4200            .vece = MO_8
4201        },
4202        {
4203            .fniv = gen_vbitseti,
4204            .fnoi = gen_helper_vbitseti_h,
4205            .opt_opc = vecop_list,
4206            .vece = MO_16
4207        },
4208        {
4209            .fniv = gen_vbitseti,
4210            .fnoi = gen_helper_vbitseti_w,
4211            .opt_opc = vecop_list,
4212            .vece = MO_32
4213        },
4214        {
4215            .fniv = gen_vbitseti,
4216            .fnoi = gen_helper_vbitseti_d,
4217            .opt_opc = vecop_list,
4218            .vece = MO_64
4219        },
4220    };
4221
4222    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
4223}
4224
4225TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti)
4226TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti)
4227TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti)
4228TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti)
4229TRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti)
4230TRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti)
4231TRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti)
4232TRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti)
4233
4234static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4235                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
4236{
4237    static const TCGOpcode vecop_list[] = {
4238        INDEX_op_shlv_vec, 0
4239        };
4240    static const GVecGen3 op[4] = {
4241        {
4242            .fniv = gen_vbitrev,
4243            .fno = gen_helper_vbitrev_b,
4244            .opt_opc = vecop_list,
4245            .vece = MO_8
4246        },
4247        {
4248            .fniv = gen_vbitrev,
4249            .fno = gen_helper_vbitrev_h,
4250            .opt_opc = vecop_list,
4251            .vece = MO_16
4252        },
4253        {
4254            .fniv = gen_vbitrev,
4255            .fno = gen_helper_vbitrev_w,
4256            .opt_opc = vecop_list,
4257            .vece = MO_32
4258        },
4259        {
4260            .fniv = gen_vbitrev,
4261            .fno = gen_helper_vbitrev_d,
4262            .opt_opc = vecop_list,
4263            .vece = MO_64
4264        },
4265    };
4266
4267    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
4268}
4269
4270TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev)
4271TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev)
4272TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev)
4273TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev)
4274TRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev)
4275TRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev)
4276TRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev)
4277TRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev)
4278
4279static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4280                        int64_t imm, uint32_t oprsz, uint32_t maxsz)
4281{
4282    static const TCGOpcode vecop_list[] = {
4283        INDEX_op_shli_vec, 0
4284        };
4285    static const GVecGen2i op[4] = {
4286        {
4287            .fniv = gen_vbitrevi,
4288            .fnoi = gen_helper_vbitrevi_b,
4289            .opt_opc = vecop_list,
4290            .vece = MO_8
4291        },
4292        {
4293            .fniv = gen_vbitrevi,
4294            .fnoi = gen_helper_vbitrevi_h,
4295            .opt_opc = vecop_list,
4296            .vece = MO_16
4297        },
4298        {
4299            .fniv = gen_vbitrevi,
4300            .fnoi = gen_helper_vbitrevi_w,
4301            .opt_opc = vecop_list,
4302            .vece = MO_32
4303        },
4304        {
4305            .fniv = gen_vbitrevi,
4306            .fnoi = gen_helper_vbitrevi_d,
4307            .opt_opc = vecop_list,
4308            .vece = MO_64
4309        },
4310    };
4311
4312    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
4313}
4314
4315TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi)
4316TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi)
4317TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi)
4318TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi)
4319TRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi)
4320TRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi)
4321TRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi)
4322TRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi)
4323
4324TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
4325TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
4326TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
4327TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
4328TRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b)
4329TRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h)
4330TRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b)
4331TRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h)
4332
4333TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
4334TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
4335TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s)
4336TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d)
4337TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
4338TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
4339TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
4340TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
4341TRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s)
4342TRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d)
4343TRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s)
4344TRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d)
4345TRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s)
4346TRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d)
4347TRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s)
4348TRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d)
4349
4350TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
4351TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
4352TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s)
4353TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d)
4354TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
4355TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
4356TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
4357TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
4358TRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s)
4359TRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d)
4360TRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s)
4361TRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d)
4362TRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s)
4363TRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d)
4364TRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s)
4365TRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d)
4366
4367TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
4368TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
4369TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
4370TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
4371TRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s)
4372TRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d)
4373TRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s)
4374TRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d)
4375
4376TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
4377TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
4378TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
4379TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
4380TRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s)
4381TRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d)
4382TRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s)
4383TRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d)
4384
4385TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
4386TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
4387TRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s)
4388TRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d)
4389
4390TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
4391TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
4392TRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s)
4393TRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d)
4394
4395TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
4396TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
4397TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
4398TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
4399TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
4400TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
4401TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s)
4402TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d)
4403TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s)
4404TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d)
4405TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
4406TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
4407
4408TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
4409TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
4410TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
4411TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
4412TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
4413TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
4414TRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h)
4415TRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h)
4416TRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s)
4417TRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s)
4418TRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s)
4419TRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d)
4420
4421TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
4422TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
4423TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s)
4424TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d)
4425TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s)
4426TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d)
4427TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
4428TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
4429TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
4430TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
4431TRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s)
4432TRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d)
4433TRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s)
4434TRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d)
4435TRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s)
4436TRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d)
4437TRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s)
4438TRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d)
4439TRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s)
4440TRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d)
4441
4442TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
4443TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
4444TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s)
4445TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d)
4446TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s)
4447TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d)
4448TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s)
4449TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d)
4450TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s)
4451TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d)
4452TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s)
4453TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d)
4454TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s)
4455TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d)
4456TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
4457TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
4458TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
4459TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
4460TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
4461TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s)
4462TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s)
4463TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s)
4464TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s)
4465TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s)
4466TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s)
4467TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
4468TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
4469TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
4470TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
4471TRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s)
4472TRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d)
4473TRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s)
4474TRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d)
4475TRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s)
4476TRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d)
4477TRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s)
4478TRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d)
4479TRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s)
4480TRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d)
4481TRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s)
4482TRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d)
4483TRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s)
4484TRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d)
4485TRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d)
4486TRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d)
4487TRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d)
4488TRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d)
4489TRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d)
4490TRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s)
4491TRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s)
4492TRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s)
4493TRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s)
4494TRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s)
4495TRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s)
4496TRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s)
4497TRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s)
4498TRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s)
4499TRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s)
4500
4501TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
4502TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
4503TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu)
4504TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
4505TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
4506TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
4507TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
4508TRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w)
4509TRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l)
4510TRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu)
4511TRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu)
4512TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w)
4513TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w)
4514TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l)
4515
4516static bool do_cmp_vl(DisasContext *ctx, arg_vvv *a,
4517                      uint32_t oprsz, MemOp mop, TCGCond cond)
4518{
4519    uint32_t vd_ofs, vj_ofs, vk_ofs;
4520
4521    if (!check_vec(ctx, oprsz)) {
4522        return true;
4523    }
4524
4525    vd_ofs = vec_full_offset(a->vd);
4526    vj_ofs = vec_full_offset(a->vj);
4527    vk_ofs = vec_full_offset(a->vk);
4528
4529    tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
4530    return true;
4531}
4532
4533static bool do_cmp(DisasContext *ctx, arg_vvv *a,
4534                   MemOp mop, TCGCond cond)
4535{
4536    return do_cmp_vl(ctx, a, 16, mop, cond);
4537}
4538
4539static bool do_xcmp(DisasContext *ctx, arg_vvv *a,
4540                    MemOp mop, TCGCond cond)
4541{
4542    return do_cmp_vl(ctx, a, 32, mop, cond);
4543}
4544
4545static bool do_cmpi_vl(DisasContext *ctx, arg_vv_i *a,
4546                       uint32_t oprsz, MemOp mop, TCGCond cond)
4547{
4548    uint32_t vd_ofs, vj_ofs;
4549
4550    if (!check_vec(ctx, oprsz)) {
4551        return true;
4552    }
4553
4554    vd_ofs = vec_full_offset(a->vd);
4555    vj_ofs = vec_full_offset(a->vj);
4556
4557    tcg_gen_gvec_cmpi(cond, mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
4558    return true;
4559}
4560
4561static bool do_cmpi(DisasContext *ctx, arg_vv_i *a,
4562                    MemOp mop, TCGCond cond)
4563{
4564    return do_cmpi_vl(ctx, a, 16, mop, cond);
4565}
4566
4567static bool do_xcmpi(DisasContext *ctx, arg_vv_i *a,
4568                     MemOp mop, TCGCond cond)
4569{
4570    return do_cmpi_vl(ctx, a, 32, mop, cond);
4571}
4572
4573TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ)
4574TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ)
4575TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ)
4576TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ)
4577TRANS(vseqi_b, LSX, do_cmpi, MO_8, TCG_COND_EQ)
4578TRANS(vseqi_h, LSX, do_cmpi, MO_16, TCG_COND_EQ)
4579TRANS(vseqi_w, LSX, do_cmpi, MO_32, TCG_COND_EQ)
4580TRANS(vseqi_d, LSX, do_cmpi, MO_64, TCG_COND_EQ)
4581TRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ)
4582TRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ)
4583TRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ)
4584TRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ)
4585TRANS(xvseqi_b, LASX, do_xcmpi, MO_8, TCG_COND_EQ)
4586TRANS(xvseqi_h, LASX, do_xcmpi, MO_16, TCG_COND_EQ)
4587TRANS(xvseqi_w, LASX, do_xcmpi, MO_32, TCG_COND_EQ)
4588TRANS(xvseqi_d, LASX, do_xcmpi, MO_64, TCG_COND_EQ)
4589
4590TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE)
4591TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE)
4592TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE)
4593TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE)
4594TRANS(vslei_b, LSX, do_cmpi, MO_8, TCG_COND_LE)
4595TRANS(vslei_h, LSX, do_cmpi, MO_16, TCG_COND_LE)
4596TRANS(vslei_w, LSX, do_cmpi, MO_32, TCG_COND_LE)
4597TRANS(vslei_d, LSX, do_cmpi, MO_64, TCG_COND_LE)
4598TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU)
4599TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU)
4600TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU)
4601TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU)
4602TRANS(vslei_bu, LSX, do_cmpi, MO_8, TCG_COND_LEU)
4603TRANS(vslei_hu, LSX, do_cmpi, MO_16, TCG_COND_LEU)
4604TRANS(vslei_wu, LSX, do_cmpi, MO_32, TCG_COND_LEU)
4605TRANS(vslei_du, LSX, do_cmpi, MO_64, TCG_COND_LEU)
4606TRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE)
4607TRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE)
4608TRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE)
4609TRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE)
4610TRANS(xvslei_b, LASX, do_xcmpi, MO_8, TCG_COND_LE)
4611TRANS(xvslei_h, LASX, do_xcmpi, MO_16, TCG_COND_LE)
4612TRANS(xvslei_w, LASX, do_xcmpi, MO_32, TCG_COND_LE)
4613TRANS(xvslei_d, LASX, do_xcmpi, MO_64, TCG_COND_LE)
4614TRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU)
4615TRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU)
4616TRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU)
4617TRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU)
4618TRANS(xvslei_bu, LASX, do_xcmpi, MO_8, TCG_COND_LEU)
4619TRANS(xvslei_hu, LASX, do_xcmpi, MO_16, TCG_COND_LEU)
4620TRANS(xvslei_wu, LASX, do_xcmpi, MO_32, TCG_COND_LEU)
4621TRANS(xvslei_du, LASX, do_xcmpi, MO_64, TCG_COND_LEU)
4622
4623TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT)
4624TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT)
4625TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT)
4626TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT)
4627TRANS(vslti_b, LSX, do_cmpi, MO_8, TCG_COND_LT)
4628TRANS(vslti_h, LSX, do_cmpi, MO_16, TCG_COND_LT)
4629TRANS(vslti_w, LSX, do_cmpi, MO_32, TCG_COND_LT)
4630TRANS(vslti_d, LSX, do_cmpi, MO_64, TCG_COND_LT)
4631TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU)
4632TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU)
4633TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU)
4634TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU)
4635TRANS(vslti_bu, LSX, do_cmpi, MO_8, TCG_COND_LTU)
4636TRANS(vslti_hu, LSX, do_cmpi, MO_16, TCG_COND_LTU)
4637TRANS(vslti_wu, LSX, do_cmpi, MO_32, TCG_COND_LTU)
4638TRANS(vslti_du, LSX, do_cmpi, MO_64, TCG_COND_LTU)
4639TRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT)
4640TRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT)
4641TRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT)
4642TRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT)
4643TRANS(xvslti_b, LASX, do_xcmpi, MO_8, TCG_COND_LT)
4644TRANS(xvslti_h, LASX, do_xcmpi, MO_16, TCG_COND_LT)
4645TRANS(xvslti_w, LASX, do_xcmpi, MO_32, TCG_COND_LT)
4646TRANS(xvslti_d, LASX, do_xcmpi, MO_64, TCG_COND_LT)
4647TRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU)
4648TRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU)
4649TRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU)
4650TRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU)
4651TRANS(xvslti_bu, LASX, do_xcmpi, MO_8, TCG_COND_LTU)
4652TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU)
4653TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU)
4654TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU)
4655
4656static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
4657{
4658    uint32_t flags;
4659    void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
4660    TCGv_i32 vd = tcg_constant_i32(a->vd);
4661    TCGv_i32 vj = tcg_constant_i32(a->vj);
4662    TCGv_i32 vk = tcg_constant_i32(a->vk);
4663    TCGv_i32 oprsz = tcg_constant_i32(sz);
4664
4665    if (!check_vec(ctx, sz)) {
4666        return true;
4667    }
4668
4669    fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
4670    flags = get_fcmp_flags(a->fcond >> 1);
4671    fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
4672
4673    return true;
4674}
4675
4676static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
4677{
4678    uint32_t flags;
4679    void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
4680    TCGv_i32 vd = tcg_constant_i32(a->vd);
4681    TCGv_i32 vj = tcg_constant_i32(a->vj);
4682    TCGv_i32 vk = tcg_constant_i32(a->vk);
4683    TCGv_i32 oprsz = tcg_constant_i32(sz);
4684
4685    if (!check_vec(ctx, sz)) {
4686        return true;
4687    }
4688
4689    fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
4690    flags = get_fcmp_flags(a->fcond >> 1);
4691    fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
4692
4693    return true;
4694}
4695
4696TRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16)
4697TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16)
4698TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32)
4699TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32)
4700
4701static bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz)
4702{
4703    if (!check_vec(ctx, oprsz)) {
4704        return true;
4705    }
4706
4707    tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
4708                        vec_full_offset(a->vk), vec_full_offset(a->vj),
4709                        oprsz, ctx->vl / 8);
4710    return true;
4711}
4712
4713TRANS(vbitsel_v, LSX, do_vbitsel_v, 16)
4714TRANS(xvbitsel_v, LASX, do_vbitsel_v, 32)
4715
4716static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
4717{
4718    tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
4719}
4720
4721static bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
4722{
4723    static const GVecGen2i op = {
4724       .fniv = gen_vbitseli,
4725       .fnoi = gen_helper_vbitseli_b,
4726       .vece = MO_8,
4727       .load_dest = true
4728    };
4729
4730    if (!check_vec(ctx, oprsz)) {
4731        return true;
4732    }
4733
4734    tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
4735                    oprsz, ctx->vl / 8, a->imm , &op);
4736    return true;
4737}
4738
4739TRANS(vbitseli_b, LSX, do_vbitseli_b, 16)
4740TRANS(xvbitseli_b, LASX, do_vbitseli_b, 32)
4741
4742#define VSET(NAME, COND)                                                       \
4743static bool trans_## NAME (DisasContext *ctx, arg_cv *a)                       \
4744{                                                                              \
4745    TCGv_i64 t1, al, ah;                                                       \
4746                                                                               \
4747    al = tcg_temp_new_i64();                                                   \
4748    ah = tcg_temp_new_i64();                                                   \
4749    t1 = tcg_temp_new_i64();                                                   \
4750                                                                               \
4751    get_vreg64(ah, a->vj, 1);                                                  \
4752    get_vreg64(al, a->vj, 0);                                                  \
4753                                                                               \
4754    if (!avail_LSX(ctx)) {                                                     \
4755        return false;                                                          \
4756    }                                                                          \
4757                                                                               \
4758    if (!check_vec(ctx, 16)) {                                                 \
4759        return true;                                                           \
4760    }                                                                          \
4761                                                                               \
4762    tcg_gen_or_i64(t1, al, ah);                                                \
4763    tcg_gen_setcondi_i64(COND, t1, t1, 0);                                     \
4764    tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
4765                                                                               \
4766    return true;                                                               \
4767}
4768
4769VSET(vseteqz_v, TCG_COND_EQ)
4770VSET(vsetnez_v, TCG_COND_NE)
4771
4772TRANS(vsetanyeqz_b, LSX, gen_cv, gen_helper_vsetanyeqz_b)
4773TRANS(vsetanyeqz_h, LSX, gen_cv, gen_helper_vsetanyeqz_h)
4774TRANS(vsetanyeqz_w, LSX, gen_cv, gen_helper_vsetanyeqz_w)
4775TRANS(vsetanyeqz_d, LSX, gen_cv, gen_helper_vsetanyeqz_d)
4776TRANS(vsetallnez_b, LSX, gen_cv, gen_helper_vsetallnez_b)
4777TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h)
4778TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w)
4779TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d)
4780
4781#define XVSET(NAME, COND)                                                      \
4782static bool trans_## NAME(DisasContext *ctx, arg_cv * a)                       \
4783{                                                                              \
4784    TCGv_i64 t1, t2, d[4];                                                     \
4785                                                                               \
4786    d[0] = tcg_temp_new_i64();                                                 \
4787    d[1] = tcg_temp_new_i64();                                                 \
4788    d[2] = tcg_temp_new_i64();                                                 \
4789    d[3] = tcg_temp_new_i64();                                                 \
4790    t1 = tcg_temp_new_i64();                                                   \
4791    t2 = tcg_temp_new_i64();                                                   \
4792                                                                               \
4793    get_vreg64(d[0], a->vj, 0);                                                \
4794    get_vreg64(d[1], a->vj, 1);                                                \
4795    get_vreg64(d[2], a->vj, 2);                                                \
4796    get_vreg64(d[3], a->vj, 3);                                                \
4797                                                                               \
4798    if (!avail_LASX(ctx)) {                                                    \
4799        return false;                                                          \
4800    }                                                                          \
4801                                                                               \
4802    if (!check_vec(ctx, 32)) {                                                 \
4803        return true;                                                           \
4804    }                                                                          \
4805                                                                               \
4806    tcg_gen_or_i64(t1, d[0], d[1]);                                            \
4807    tcg_gen_or_i64(t2, d[2], d[3]);                                            \
4808    tcg_gen_or_i64(t1, t2, t1);                                                \
4809    tcg_gen_setcondi_i64(COND, t1, t1, 0);                                     \
4810    tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
4811                                                                               \
4812    return true;                                                               \
4813}
4814
4815XVSET(xvseteqz_v, TCG_COND_EQ)
4816XVSET(xvsetnez_v, TCG_COND_NE)
4817
4818TRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b)
4819TRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h)
4820TRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w)
4821TRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d)
4822TRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b)
4823TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h)
4824TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w)
4825TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d)
4826
4827static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop,
4828                       void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4829{
4830    TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4831
4832    if (!check_vec(ctx, oprsz)) {
4833        return true;
4834    }
4835
4836    func(src, tcg_env, vec_reg_offset(a->vd, a->imm, mop));
4837
4838    return true;
4839}
4840
4841static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop,
4842                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4843{
4844    return gen_g2v_vl(ctx, a, 16, mop, func);
4845}
4846
4847static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop,
4848                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4849{
4850    return gen_g2v_vl(ctx, a, 32, mop, func);
4851}
4852
4853TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64)
4854TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64)
4855TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64)
4856TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64)
4857TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64)
4858TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64)
4859
4860static bool gen_v2g_vl(DisasContext *ctx, arg_rv_i *a, uint32_t oprsz, MemOp mop,
4861                       void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4862{
4863    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4864
4865    if (!check_vec(ctx, oprsz)) {
4866        return true;
4867    }
4868
4869    func(dst, tcg_env, vec_reg_offset(a->vj, a->imm, mop));
4870
4871    return true;
4872}
4873
4874static bool gen_v2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
4875                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4876{
4877    return gen_v2g_vl(ctx, a, 16, mop, func);
4878}
4879
4880static bool gen_x2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
4881                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4882{
4883    return gen_v2g_vl(ctx, a, 32, mop, func);
4884}
4885
4886TRANS(vpickve2gr_b, LSX, gen_v2g, MO_8, tcg_gen_ld8s_i64)
4887TRANS(vpickve2gr_h, LSX, gen_v2g, MO_16, tcg_gen_ld16s_i64)
4888TRANS(vpickve2gr_w, LSX, gen_v2g, MO_32, tcg_gen_ld32s_i64)
4889TRANS(vpickve2gr_d, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
4890TRANS(vpickve2gr_bu, LSX, gen_v2g, MO_8, tcg_gen_ld8u_i64)
4891TRANS(vpickve2gr_hu, LSX, gen_v2g, MO_16, tcg_gen_ld16u_i64)
4892TRANS(vpickve2gr_wu, LSX, gen_v2g, MO_32, tcg_gen_ld32u_i64)
4893TRANS(vpickve2gr_du, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
4894TRANS(xvpickve2gr_w, LASX, gen_x2g, MO_32, tcg_gen_ld32s_i64)
4895TRANS(xvpickve2gr_d, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
4896TRANS(xvpickve2gr_wu, LASX, gen_x2g, MO_32, tcg_gen_ld32u_i64)
4897TRANS(xvpickve2gr_du, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
4898
4899static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
4900                        uint32_t oprsz, MemOp mop)
4901{
4902    TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4903
4904    if (!check_vec(ctx, oprsz)) {
4905        return true;
4906    }
4907
4908    tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
4909                         oprsz, ctx->vl/8, src);
4910    return true;
4911}
4912
4913static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
4914{
4915    return gvec_dup_vl(ctx, a, 16, mop);
4916}
4917
4918static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop)
4919{
4920    return gvec_dup_vl(ctx, a, 32, mop);
4921}
4922
4923TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
4924TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
4925TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
4926TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
4927TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8)
4928TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16)
4929TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32)
4930TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64)
4931
4932static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
4933{
4934    if (!avail_LSX(ctx)) {
4935        return false;
4936    }
4937
4938    if (!check_vec(ctx, 16)) {
4939        return true;
4940    }
4941
4942    tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
4943                         offsetof(CPULoongArchState,
4944                                  fpr[a->vj].vreg.B((a->imm))),
4945                         16, ctx->vl/8);
4946    return true;
4947}
4948
4949static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
4950{
4951    if (!avail_LSX(ctx)) {
4952        return false;
4953    }
4954
4955    if (!check_vec(ctx, 16)) {
4956        return true;
4957    }
4958
4959    tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
4960                         offsetof(CPULoongArchState,
4961                                  fpr[a->vj].vreg.H((a->imm))),
4962                         16, ctx->vl/8);
4963    return true;
4964}
4965static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
4966{
4967    if (!avail_LSX(ctx)) {
4968        return false;
4969    }
4970
4971    if (!check_vec(ctx, 16)) {
4972        return true;
4973    }
4974
4975    tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
4976                         offsetof(CPULoongArchState,
4977                                  fpr[a->vj].vreg.W((a->imm))),
4978                        16, ctx->vl/8);
4979    return true;
4980}
4981static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
4982{
4983    if (!avail_LSX(ctx)) {
4984        return false;
4985    }
4986
4987    if (!check_vec(ctx, 16)) {
4988        return true;
4989    }
4990
4991    tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
4992                         offsetof(CPULoongArchState,
4993                                  fpr[a->vj].vreg.D((a->imm))),
4994                         16, ctx->vl/8);
4995    return true;
4996}
4997
4998static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
4999                           uint32_t oprsz, int vece, int bit,
5000                           void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
5001{
5002    int i;
5003    TCGv_i64 t0 = tcg_temp_new_i64();
5004    TCGv_ptr t1 = tcg_temp_new_ptr();
5005    TCGv_i64 t2 = tcg_temp_new_i64();
5006
5007    if (!check_vec(ctx, oprsz)) {
5008        return true;
5009    }
5010
5011    tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
5012    tcg_gen_shli_i64(t0, t0, vece);
5013    if (HOST_BIG_ENDIAN) {
5014        tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1));
5015    }
5016
5017    tcg_gen_trunc_i64_ptr(t1, t0);
5018    tcg_gen_add_ptr(t1, t1, tcg_env);
5019
5020    for (i = 0; i < oprsz; i += 16) {
5021        func(t2, t1, vec_full_offset(a->vj) + i);
5022        tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2);
5023    }
5024
5025    return true;
5026}
5027
5028static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
5029                        void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
5030{
5031    return gen_vreplve_vl(ctx, a, 16, vece, bit, func);
5032}
5033
5034static bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
5035                         void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
5036{
5037    return gen_vreplve_vl(ctx, a, 32, vece, bit, func);
5038}
5039
5040TRANS(vreplve_b, LSX, gen_vreplve, MO_8,  8, tcg_gen_ld8u_i64)
5041TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
5042TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
5043TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
5044TRANS(xvreplve_b, LASX, gen_xvreplve, MO_8,  8, tcg_gen_ld8u_i64)
5045TRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64)
5046TRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64)
5047TRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64)
5048
5049static bool gen_xvrepl128(DisasContext *ctx, arg_vv_i *a, MemOp mop)
5050{
5051    int i;
5052
5053    if (!check_vec(ctx, 32)) {
5054        return true;
5055    }
5056
5057    for (i = 0; i < 32; i += 16) {
5058        tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd) + i,
5059                             vec_reg_offset(a->vj, a->imm, mop) + i, 16, 16);
5060
5061    }
5062    return true;
5063}
5064
5065TRANS(xvrepl128vei_b, LASX, gen_xvrepl128, MO_8)
5066TRANS(xvrepl128vei_h, LASX, gen_xvrepl128, MO_16)
5067TRANS(xvrepl128vei_w, LASX, gen_xvrepl128, MO_32)
5068TRANS(xvrepl128vei_d, LASX, gen_xvrepl128, MO_64)
5069
5070static bool gen_xvreplve0(DisasContext *ctx, arg_vv *a, MemOp mop)
5071{
5072    if (!check_vec(ctx, 32)) {
5073        return true;
5074    }
5075
5076    tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd),
5077                         vec_full_offset(a->vj), 32, 32);
5078    return true;
5079}
5080
5081TRANS(xvreplve0_b, LASX, gen_xvreplve0, MO_8)
5082TRANS(xvreplve0_h, LASX, gen_xvreplve0, MO_16)
5083TRANS(xvreplve0_w, LASX, gen_xvreplve0, MO_32)
5084TRANS(xvreplve0_d, LASX, gen_xvreplve0, MO_64)
5085TRANS(xvreplve0_q, LASX, gen_xvreplve0, MO_128)
5086
5087TRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w)
5088TRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d)
5089
5090TRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w)
5091TRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d)
5092
5093static bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
5094{
5095    int i, ofs;
5096
5097    if (!check_vec(ctx, oprsz)) {
5098        return true;
5099    }
5100
5101    for (i = 0; i < oprsz / 16; i++) {
5102        TCGv desthigh = tcg_temp_new_i64();
5103        TCGv destlow = tcg_temp_new_i64();
5104        TCGv high = tcg_temp_new_i64();
5105        TCGv low = tcg_temp_new_i64();
5106
5107        get_vreg64(low, a->vj, 2 * i);
5108
5109        ofs = ((a->imm) & 0xf) * 8;
5110        if (ofs < 64) {
5111            get_vreg64(high, a->vj, 2 * i + 1);
5112            tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
5113            tcg_gen_shli_i64(destlow, low, ofs);
5114        } else {
5115            tcg_gen_shli_i64(desthigh, low, ofs - 64);
5116            destlow = tcg_constant_i64(0);
5117        }
5118        set_vreg64(desthigh, a->vd, 2 * i + 1);
5119        set_vreg64(destlow, a->vd, 2 * i);
5120    }
5121
5122    return true;
5123}
5124
5125static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
5126{
5127    int i, ofs;
5128
5129    if (!check_vec(ctx, 32)) {
5130        return true;
5131    }
5132
5133    for (i = 0; i < oprsz / 16; i++) {
5134        TCGv desthigh = tcg_temp_new_i64();
5135        TCGv destlow = tcg_temp_new_i64();
5136        TCGv high = tcg_temp_new_i64();
5137        TCGv low = tcg_temp_new_i64();
5138        get_vreg64(high, a->vj, 2 * i + 1);
5139
5140        ofs = ((a->imm) & 0xf) * 8;
5141        if (ofs < 64) {
5142            get_vreg64(low, a->vj, 2 * i);
5143            tcg_gen_extract2_i64(destlow, low, high, ofs);
5144            tcg_gen_shri_i64(desthigh, high, ofs);
5145        } else {
5146            tcg_gen_shri_i64(destlow, high, ofs - 64);
5147            desthigh = tcg_constant_i64(0);
5148        }
5149        set_vreg64(desthigh, a->vd, 2 * i + 1);
5150        set_vreg64(destlow, a->vd, 2 * i);
5151    }
5152
5153    return true;
5154}
5155
5156TRANS(vbsll_v, LSX, do_vbsll_v, 16)
5157TRANS(vbsrl_v, LSX, do_vbsrl_v, 16)
5158TRANS(xvbsll_v, LASX, do_vbsll_v, 32)
5159TRANS(xvbsrl_v, LASX, do_vbsrl_v, 32)
5160
5161TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b)
5162TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h)
5163TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w)
5164TRANS(vpackev_d, LSX, gen_vvv, gen_helper_vpackev_d)
5165TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b)
5166TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h)
5167TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w)
5168TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d)
5169TRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b)
5170TRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h)
5171TRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w)
5172TRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d)
5173TRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b)
5174TRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h)
5175TRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w)
5176TRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d)
5177
5178TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b)
5179TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h)
5180TRANS(vpickev_w, LSX, gen_vvv, gen_helper_vpickev_w)
5181TRANS(vpickev_d, LSX, gen_vvv, gen_helper_vpickev_d)
5182TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b)
5183TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h)
5184TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w)
5185TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d)
5186TRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b)
5187TRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h)
5188TRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w)
5189TRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d)
5190TRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b)
5191TRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h)
5192TRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w)
5193TRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d)
5194
5195TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b)
5196TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h)
5197TRANS(vilvl_w, LSX, gen_vvv, gen_helper_vilvl_w)
5198TRANS(vilvl_d, LSX, gen_vvv, gen_helper_vilvl_d)
5199TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b)
5200TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h)
5201TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w)
5202TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d)
5203TRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b)
5204TRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h)
5205TRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w)
5206TRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d)
5207TRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b)
5208TRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h)
5209TRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w)
5210TRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d)
5211
5212TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
5213TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
5214TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w)
5215TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d)
5216TRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b)
5217TRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h)
5218TRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w)
5219TRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d)
5220TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b)
5221TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h)
5222TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w)
5223TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d)
5224TRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b)
5225TRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h)
5226TRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w)
5227TRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d)
5228
5229TRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w)
5230TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w)
5231TRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w)
5232TRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d)
5233TRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q)
5234
5235TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b)
5236TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h)
5237TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w)
5238TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d)
5239TRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b)
5240TRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h)
5241TRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w)
5242TRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d)
5243
5244static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
5245{
5246    TCGv addr;
5247    TCGv_i64 rl, rh;
5248    TCGv_i128 val;
5249
5250    if (!avail_LSX(ctx)) {
5251        return false;
5252    }
5253
5254    if (!check_vec(ctx, 16)) {
5255        return true;
5256    }
5257
5258    addr = gpr_src(ctx, a->rj, EXT_NONE);
5259    val = tcg_temp_new_i128();
5260    rl = tcg_temp_new_i64();
5261    rh = tcg_temp_new_i64();
5262
5263    addr = make_address_i(ctx, addr, a->imm);
5264
5265    tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5266    tcg_gen_extr_i128_i64(rl, rh, val);
5267    set_vreg64(rh, a->vd, 1);
5268    set_vreg64(rl, a->vd, 0);
5269
5270    return true;
5271}
5272
5273static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
5274{
5275    TCGv addr;
5276    TCGv_i128 val;
5277    TCGv_i64 ah, al;
5278
5279    if (!avail_LSX(ctx)) {
5280        return false;
5281    }
5282
5283    if (!check_vec(ctx, 16)) {
5284        return true;
5285    }
5286
5287    addr = gpr_src(ctx, a->rj, EXT_NONE);
5288    val = tcg_temp_new_i128();
5289    ah = tcg_temp_new_i64();
5290    al = tcg_temp_new_i64();
5291
5292    addr = make_address_i(ctx, addr, a->imm);
5293
5294    get_vreg64(ah, a->vd, 1);
5295    get_vreg64(al, a->vd, 0);
5296    tcg_gen_concat_i64_i128(val, al, ah);
5297    tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5298
5299    return true;
5300}
5301
5302static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
5303{
5304    TCGv addr, src1, src2;
5305    TCGv_i64 rl, rh;
5306    TCGv_i128 val;
5307
5308    if (!avail_LSX(ctx)) {
5309        return false;
5310    }
5311
5312    if (!check_vec(ctx, 16)) {
5313        return true;
5314    }
5315
5316    src1 = gpr_src(ctx, a->rj, EXT_NONE);
5317    src2 = gpr_src(ctx, a->rk, EXT_NONE);
5318    val = tcg_temp_new_i128();
5319    rl = tcg_temp_new_i64();
5320    rh = tcg_temp_new_i64();
5321
5322    addr = make_address_x(ctx, src1, src2);
5323    tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5324    tcg_gen_extr_i128_i64(rl, rh, val);
5325    set_vreg64(rh, a->vd, 1);
5326    set_vreg64(rl, a->vd, 0);
5327
5328    return true;
5329}
5330
5331static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
5332{
5333    TCGv addr, src1, src2;
5334    TCGv_i64 ah, al;
5335    TCGv_i128 val;
5336
5337    if (!avail_LSX(ctx)) {
5338        return false;
5339    }
5340
5341    if (!check_vec(ctx, 16)) {
5342        return true;
5343    }
5344
5345    src1 = gpr_src(ctx, a->rj, EXT_NONE);
5346    src2 = gpr_src(ctx, a->rk, EXT_NONE);
5347    val = tcg_temp_new_i128();
5348    ah = tcg_temp_new_i64();
5349    al = tcg_temp_new_i64();
5350
5351    addr = make_address_x(ctx, src1, src2);
5352    get_vreg64(ah, a->vd, 1);
5353    get_vreg64(al, a->vd, 0);
5354    tcg_gen_concat_i64_i128(val, al, ah);
5355    tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5356
5357    return true;
5358}
5359
5360static bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a,
5361                          uint32_t oprsz, MemOp mop)
5362{
5363    TCGv addr;
5364    TCGv_i64 val;
5365
5366    if (!check_vec(ctx, oprsz)) {
5367        return true;
5368    }
5369
5370    addr = gpr_src(ctx, a->rj, EXT_NONE);
5371    val = tcg_temp_new_i64();
5372
5373    addr = make_address_i(ctx, addr, a->imm);
5374
5375    tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop);
5376    tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val);
5377
5378    return true;
5379}
5380
5381static bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
5382{
5383    return do_vldrepl_vl(ctx, a, 16, mop);
5384}
5385
5386static bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
5387{
5388    return do_vldrepl_vl(ctx, a, 32, mop);
5389}
5390
5391TRANS(vldrepl_b, LSX, do_vldrepl, MO_8)
5392TRANS(vldrepl_h, LSX, do_vldrepl, MO_16)
5393TRANS(vldrepl_w, LSX, do_vldrepl, MO_32)
5394TRANS(vldrepl_d, LSX, do_vldrepl, MO_64)
5395TRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8)
5396TRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16)
5397TRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32)
5398TRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64)
5399
5400static bool do_vstelm_vl(DisasContext *ctx,
5401                         arg_vr_ii *a, uint32_t oprsz, MemOp mop)
5402{
5403    TCGv addr;
5404    TCGv_i64 val;
5405
5406    if (!check_vec(ctx, oprsz)) {
5407        return true;
5408    }
5409
5410    addr = gpr_src(ctx, a->rj, EXT_NONE);
5411    val = tcg_temp_new_i64();
5412
5413    addr = make_address_i(ctx, addr, a->imm);
5414    tcg_gen_ld_i64(val, tcg_env, vec_reg_offset(a->vd, a->imm2, mop));
5415    tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, mop);
5416    return true;
5417}
5418
5419static bool do_vstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
5420{
5421    return do_vstelm_vl(ctx, a, 16, mop);
5422}
5423
5424static bool do_xvstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
5425{
5426    return do_vstelm_vl(ctx, a, 32, mop);
5427}
5428
5429TRANS(vstelm_b, LSX, do_vstelm, MO_8)
5430TRANS(vstelm_h, LSX, do_vstelm, MO_16)
5431TRANS(vstelm_w, LSX, do_vstelm, MO_32)
5432TRANS(vstelm_d, LSX, do_vstelm, MO_64)
5433TRANS(xvstelm_b, LASX, do_xvstelm, MO_8)
5434TRANS(xvstelm_h, LASX, do_xvstelm, MO_16)
5435TRANS(xvstelm_w, LASX, do_xvstelm, MO_32)
5436TRANS(xvstelm_d, LASX, do_xvstelm, MO_64)
5437
5438static bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a,
5439                            void (*func)(DisasContext *, int, TCGv))
5440{
5441    TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
5442    TCGv temp = NULL;
5443
5444    if (!check_vec(ctx, 32)) {
5445        return true;
5446    }
5447
5448    if (a->imm) {
5449        temp = tcg_temp_new();
5450        tcg_gen_addi_tl(temp, addr, a->imm);
5451        addr = temp;
5452    }
5453
5454    func(ctx, a->vd, addr);
5455    return true;
5456}
5457
5458static void gen_xvld(DisasContext *ctx, int vreg, TCGv addr)
5459{
5460    int i;
5461    TCGv temp = tcg_temp_new();
5462    TCGv dest = tcg_temp_new();
5463
5464    tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
5465    set_vreg64(dest, vreg, 0);
5466
5467    for (i = 1; i < 4; i++) {
5468        tcg_gen_addi_tl(temp, addr, 8 * i);
5469        tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
5470        set_vreg64(dest, vreg, i);
5471    }
5472}
5473
5474static void gen_xvst(DisasContext * ctx, int vreg, TCGv addr)
5475{
5476    int i;
5477    TCGv temp = tcg_temp_new();
5478    TCGv dest = tcg_temp_new();
5479
5480    get_vreg64(dest, vreg, 0);
5481    tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
5482
5483    for (i = 1; i < 4; i++) {
5484        tcg_gen_addi_tl(temp, addr, 8 * i);
5485        get_vreg64(dest, vreg, i);
5486        tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
5487    }
5488}
5489
5490TRANS(xvld, LASX, gen_lasx_memory, gen_xvld)
5491TRANS(xvst, LASX, gen_lasx_memory, gen_xvst)
5492
5493static bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a,
5494                             void (*func)(DisasContext*, int, TCGv))
5495{
5496    TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
5497    TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
5498    TCGv addr = tcg_temp_new();
5499
5500    if (!check_vec(ctx, 32)) {
5501        return true;
5502    }
5503
5504    tcg_gen_add_tl(addr, src1, src2);
5505    func(ctx, a->vd, addr);
5506
5507    return true;
5508}
5509
5510TRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld)
5511TRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst)
5512