xref: /openbmc/qemu/target/loongarch/tcg/insn_trans/trans_vec.c.inc (revision a74434580e1051bff12ab5eee5586058295c497f)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * LoongArch vector translate functions
4 * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
5 */
6
7static bool check_vec(DisasContext *ctx, uint32_t oprsz)
8{
9    if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
10        generate_exception(ctx, EXCCODE_SXD);
11        return false;
12    }
13
14    if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) {
15        generate_exception(ctx, EXCCODE_ASXD);
16        return false;
17    }
18
19    return true;
20}
21
22static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
23                            gen_helper_gvec_4_ptr *fn)
24{
25    if (!check_vec(ctx, oprsz)) {
26        return true;
27    }
28
29    tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
30                       vec_full_offset(a->vj),
31                       vec_full_offset(a->vk),
32                       vec_full_offset(a->va),
33                       tcg_env,
34                       oprsz, ctx->vl / 8, 0, fn);
35    return true;
36}
37
38static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
39                         gen_helper_gvec_4_ptr *fn)
40{
41    return gen_vvvv_ptr_vl(ctx, a, 16, fn);
42}
43
44static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a,
45                         gen_helper_gvec_4_ptr *fn)
46{
47    return gen_vvvv_ptr_vl(ctx, a, 32, fn);
48}
49
50static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
51                        gen_helper_gvec_4 *fn)
52{
53    if (!check_vec(ctx, oprsz)) {
54        return true;
55    }
56
57    tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
58                       vec_full_offset(a->vj),
59                       vec_full_offset(a->vk),
60                       vec_full_offset(a->va),
61                       oprsz, ctx->vl / 8, 0, fn);
62    return true;
63}
64
65static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
66                     gen_helper_gvec_4 *fn)
67{
68    return gen_vvvv_vl(ctx, a, 16, fn);
69}
70
71static bool gen_xxxx(DisasContext *ctx, arg_vvvv *a,
72                     gen_helper_gvec_4 *fn)
73{
74    return gen_vvvv_vl(ctx, a, 32, fn);
75}
76
77static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
78                           gen_helper_gvec_3_ptr *fn)
79{
80    if (!check_vec(ctx, oprsz)) {
81        return true;
82    }
83    tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
84                       vec_full_offset(a->vj),
85                       vec_full_offset(a->vk),
86                       tcg_env,
87                       oprsz, ctx->vl / 8, 0, fn);
88    return true;
89}
90
91static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
92                        gen_helper_gvec_3_ptr *fn)
93{
94    return gen_vvv_ptr_vl(ctx, a, 16, fn);
95}
96
97static bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a,
98                        gen_helper_gvec_3_ptr *fn)
99{
100    return gen_vvv_ptr_vl(ctx, a, 32, fn);
101}
102
103static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
104                       gen_helper_gvec_3 *fn)
105{
106    if (!check_vec(ctx, oprsz)) {
107        return true;
108    }
109
110    tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
111                       vec_full_offset(a->vj),
112                       vec_full_offset(a->vk),
113                       oprsz, ctx->vl / 8, 0, fn);
114    return true;
115}
116
117static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
118{
119    return gen_vvv_vl(ctx, a, 16, fn);
120}
121
122static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
123{
124    return gen_vvv_vl(ctx, a, 32, fn);
125}
126
127static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
128                          gen_helper_gvec_2_ptr *fn)
129{
130    if (!check_vec(ctx, oprsz)) {
131        return true;
132    }
133
134    tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
135                       vec_full_offset(a->vj),
136                       tcg_env,
137                       oprsz, ctx->vl / 8, 0, fn);
138    return true;
139}
140
141static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
142                       gen_helper_gvec_2_ptr *fn)
143{
144    return gen_vv_ptr_vl(ctx, a, 16, fn);
145}
146
147static bool gen_xx_ptr(DisasContext *ctx, arg_vv *a,
148                       gen_helper_gvec_2_ptr *fn)
149{
150    return gen_vv_ptr_vl(ctx, a, 32, fn);
151}
152
153static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
154                      gen_helper_gvec_2 *fn)
155{
156    if (!check_vec(ctx, oprsz)) {
157        return true;
158    }
159
160    tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
161                       vec_full_offset(a->vj),
162                       oprsz, ctx->vl / 8, 0, fn);
163    return true;
164}
165
166static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
167{
168    return gen_vv_vl(ctx, a, 16, fn);
169}
170
171static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
172{
173    return gen_vv_vl(ctx, a, 32, fn);
174}
175
176static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
177                        gen_helper_gvec_2i *fn)
178{
179    if (!check_vec(ctx, oprsz)) {
180        return true;
181    }
182
183    tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
184                        vec_full_offset(a->vj),
185                        tcg_constant_i64(a->imm),
186                        oprsz, ctx->vl / 8, 0, fn);
187    return true;
188}
189
190static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
191{
192    return gen_vv_i_vl(ctx, a, 16, fn);
193}
194
195static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
196{
197    return gen_vv_i_vl(ctx, a, 32, fn);
198}
199
200static bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz,
201                      void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
202{
203    if (!check_vec(ctx, sz)) {
204        return true;
205    }
206
207    TCGv_i32 vj = tcg_constant_i32(a->vj);
208    TCGv_i32 cd = tcg_constant_i32(a->cd);
209    TCGv_i32 oprsz = tcg_constant_i32(sz);
210
211    func(tcg_env, oprsz, cd, vj);
212    return true;
213}
214
215static bool gen_cv(DisasContext *ctx, arg_cv *a,
216                   void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
217{
218    return gen_cv_vl(ctx, a, 16, func);
219}
220
221static bool gen_cx(DisasContext *ctx, arg_cv *a,
222                   void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
223{
224    return gen_cv_vl(ctx, a, 32, func);
225}
226
227static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
228                        uint32_t oprsz, MemOp mop,
229                        void (*func)(unsigned, uint32_t, uint32_t,
230                                     uint32_t, uint32_t, uint32_t))
231{
232    uint32_t vd_ofs = vec_full_offset(a->vd);
233    uint32_t vj_ofs = vec_full_offset(a->vj);
234    uint32_t vk_ofs = vec_full_offset(a->vk);
235
236    if (!check_vec(ctx, oprsz)) {
237        return true;
238    }
239
240    func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
241    return true;
242}
243
244static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
245                     void (*func)(unsigned, uint32_t, uint32_t,
246                                  uint32_t, uint32_t, uint32_t))
247{
248    return gvec_vvv_vl(ctx, a, 16, mop, func);
249}
250
251static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
252                     void (*func)(unsigned, uint32_t, uint32_t,
253                                  uint32_t, uint32_t, uint32_t))
254{
255    return gvec_vvv_vl(ctx, a, 32, mop, func);
256}
257
258static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
259                       uint32_t oprsz, MemOp mop,
260                       void (*func)(unsigned, uint32_t, uint32_t,
261                                    uint32_t, uint32_t))
262{
263    uint32_t vd_ofs = vec_full_offset(a->vd);
264    uint32_t vj_ofs = vec_full_offset(a->vj);
265
266    if (!check_vec(ctx, oprsz)) {
267        return true;
268    }
269
270    func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
271    return true;
272}
273
274
275static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
276                    void (*func)(unsigned, uint32_t, uint32_t,
277                                 uint32_t, uint32_t))
278{
279    return gvec_vv_vl(ctx, a, 16, mop, func);
280}
281
282static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
283                    void (*func)(unsigned, uint32_t, uint32_t,
284                                 uint32_t, uint32_t))
285{
286    return gvec_vv_vl(ctx, a, 32, mop, func);
287}
288
289static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
290                         uint32_t oprsz, MemOp mop,
291                         void (*func)(unsigned, uint32_t, uint32_t,
292                                      int64_t, uint32_t, uint32_t))
293{
294    uint32_t vd_ofs = vec_full_offset(a->vd);
295    uint32_t vj_ofs = vec_full_offset(a->vj);
296
297    if (!check_vec(ctx, oprsz)) {
298        return true;
299    }
300
301    func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
302    return true;
303}
304
305static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
306                      void (*func)(unsigned, uint32_t, uint32_t,
307                                   int64_t, uint32_t, uint32_t))
308{
309    return gvec_vv_i_vl(ctx, a, 16, mop, func);
310}
311
312static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
313                      void (*func)(unsigned, uint32_t, uint32_t,
314                                   int64_t, uint32_t, uint32_t))
315{
316    return gvec_vv_i_vl(ctx,a, 32, mop, func);
317}
318
319static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
320                         uint32_t oprsz, MemOp mop)
321{
322    uint32_t vd_ofs = vec_full_offset(a->vd);
323    uint32_t vj_ofs = vec_full_offset(a->vj);
324
325    if (!check_vec(ctx, oprsz)) {
326        return true;
327    }
328
329    tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
330    return true;
331}
332
333static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
334{
335    return gvec_subi_vl(ctx, a, 16, mop);
336}
337
338static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
339{
340    return gvec_subi_vl(ctx, a, 32, mop);
341}
342
343TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
344TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
345TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
346TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add)
347TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add)
348TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add)
349TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add)
350TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add)
351
352static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
353                             void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
354                                          TCGv_i64, TCGv_i64, TCGv_i64))
355{
356    int i;
357    TCGv_i64 rh, rl, ah, al, bh, bl;
358
359    if (!check_vec(ctx, oprsz)) {
360        return true;
361    }
362
363    rh = tcg_temp_new_i64();
364    rl = tcg_temp_new_i64();
365    ah = tcg_temp_new_i64();
366    al = tcg_temp_new_i64();
367    bh = tcg_temp_new_i64();
368    bl = tcg_temp_new_i64();
369
370    for (i = 0; i < oprsz / 16; i++) {
371        get_vreg64(ah, a->vj, 1 + i * 2);
372        get_vreg64(al, a->vj, i * 2);
373        get_vreg64(bh, a->vk, 1 + i * 2);
374        get_vreg64(bl, a->vk, i * 2);
375
376        func(rl, rh, al, ah, bl, bh);
377
378        set_vreg64(rh, a->vd, 1 + i * 2);
379        set_vreg64(rl, a->vd, i * 2);
380    }
381    return true;
382}
383
384static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
385                          void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
386                                       TCGv_i64, TCGv_i64, TCGv_i64))
387{
388    return gen_vaddsub_q_vl(ctx, a, 16, func);
389}
390
391static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
392                           void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
393                                        TCGv_i64, TCGv_i64, TCGv_i64))
394{
395    return gen_vaddsub_q_vl(ctx, a, 32, func);
396}
397
398TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub)
399TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub)
400TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub)
401TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub)
402TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub)
403TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub)
404TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub)
405TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub)
406
407TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64)
408TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64)
409TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64)
410TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64)
411
412TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
413TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
414TRANS(vaddi_wu, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_addi)
415TRANS(vaddi_du, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_addi)
416TRANS(vsubi_bu, LSX, gvec_subi, MO_8)
417TRANS(vsubi_hu, LSX, gvec_subi, MO_16)
418TRANS(vsubi_wu, LSX, gvec_subi, MO_32)
419TRANS(vsubi_du, LSX, gvec_subi, MO_64)
420TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi)
421TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi)
422TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi)
423TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi)
424TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8)
425TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16)
426TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32)
427TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64)
428
429TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
430TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
431TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg)
432TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg)
433TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg)
434TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg)
435TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg)
436TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg)
437
438TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
439TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
440TRANS(vsadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ssadd)
441TRANS(vsadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ssadd)
442TRANS(vsadd_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_usadd)
443TRANS(vsadd_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_usadd)
444TRANS(vsadd_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_usadd)
445TRANS(vsadd_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_usadd)
446TRANS(vssub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sssub)
447TRANS(vssub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sssub)
448TRANS(vssub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sssub)
449TRANS(vssub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sssub)
450TRANS(vssub_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
451TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
452TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
453TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
454
455TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd)
456TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd)
457TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd)
458TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd)
459TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd)
460TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd)
461TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd)
462TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd)
463TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub)
464TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub)
465TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub)
466TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub)
467TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub)
468TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub)
469TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub)
470TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub)
471
472TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b)
473TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h)
474TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w)
475TRANS(vhaddw_q_d, LSX, gen_vvv, gen_helper_vhaddw_q_d)
476TRANS(vhaddw_hu_bu, LSX, gen_vvv, gen_helper_vhaddw_hu_bu)
477TRANS(vhaddw_wu_hu, LSX, gen_vvv, gen_helper_vhaddw_wu_hu)
478TRANS(vhaddw_du_wu, LSX, gen_vvv, gen_helper_vhaddw_du_wu)
479TRANS(vhaddw_qu_du, LSX, gen_vvv, gen_helper_vhaddw_qu_du)
480TRANS(vhsubw_h_b, LSX, gen_vvv, gen_helper_vhsubw_h_b)
481TRANS(vhsubw_w_h, LSX, gen_vvv, gen_helper_vhsubw_w_h)
482TRANS(vhsubw_d_w, LSX, gen_vvv, gen_helper_vhsubw_d_w)
483TRANS(vhsubw_q_d, LSX, gen_vvv, gen_helper_vhsubw_q_d)
484TRANS(vhsubw_hu_bu, LSX, gen_vvv, gen_helper_vhsubw_hu_bu)
485TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu)
486TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu)
487TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du)
488
489TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b)
490TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h)
491TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w)
492TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d)
493TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu)
494TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu)
495TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu)
496TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du)
497TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b)
498TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h)
499TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w)
500TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d)
501TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu)
502TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu)
503TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu)
504TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du)
505
506static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
507{
508    TCGv_vec t1, t2;
509
510    int halfbits = 4 << vece;
511
512    t1 = tcg_temp_new_vec_matching(a);
513    t2 = tcg_temp_new_vec_matching(b);
514
515    /* Sign-extend the even elements from a */
516    tcg_gen_shli_vec(vece, t1, a, halfbits);
517    tcg_gen_sari_vec(vece, t1, t1, halfbits);
518
519    /* Sign-extend the even elements from b */
520    tcg_gen_shli_vec(vece, t2, b, halfbits);
521    tcg_gen_sari_vec(vece, t2, t2, halfbits);
522
523    tcg_gen_add_vec(vece, t, t1, t2);
524}
525
526static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
527{
528    TCGv_i32 t1, t2;
529
530    t1 = tcg_temp_new_i32();
531    t2 = tcg_temp_new_i32();
532    tcg_gen_ext16s_i32(t1, a);
533    tcg_gen_ext16s_i32(t2, b);
534    tcg_gen_add_i32(t, t1, t2);
535}
536
537static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
538{
539    TCGv_i64 t1, t2;
540
541    t1 = tcg_temp_new_i64();
542    t2 = tcg_temp_new_i64();
543    tcg_gen_ext32s_i64(t1, a);
544    tcg_gen_ext32s_i64(t2, b);
545    tcg_gen_add_i64(t, t1, t2);
546}
547
548static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
549                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
550{
551    static const TCGOpcode vecop_list[] = {
552        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
553        };
554    static const GVecGen3 op[4] = {
555        {
556            .fniv = gen_vaddwev_s,
557            .fno = gen_helper_vaddwev_h_b,
558            .opt_opc = vecop_list,
559            .vece = MO_16
560        },
561        {
562            .fni4 = gen_vaddwev_w_h,
563            .fniv = gen_vaddwev_s,
564            .fno = gen_helper_vaddwev_w_h,
565            .opt_opc = vecop_list,
566            .vece = MO_32
567        },
568        {
569            .fni8 = gen_vaddwev_d_w,
570            .fniv = gen_vaddwev_s,
571            .fno = gen_helper_vaddwev_d_w,
572            .opt_opc = vecop_list,
573            .vece = MO_64
574        },
575        {
576            .fno = gen_helper_vaddwev_q_d,
577            .vece = MO_128
578        },
579    };
580
581    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
582}
583
584TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s)
585TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s)
586TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s)
587TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s)
588TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s)
589TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s)
590TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s)
591TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s)
592
593static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
594{
595    TCGv_i32 t1, t2;
596
597    t1 = tcg_temp_new_i32();
598    t2 = tcg_temp_new_i32();
599    tcg_gen_sari_i32(t1, a, 16);
600    tcg_gen_sari_i32(t2, b, 16);
601    tcg_gen_add_i32(t, t1, t2);
602}
603
604static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
605{
606    TCGv_i64 t1, t2;
607
608    t1 = tcg_temp_new_i64();
609    t2 = tcg_temp_new_i64();
610    tcg_gen_sari_i64(t1, a, 32);
611    tcg_gen_sari_i64(t2, b, 32);
612    tcg_gen_add_i64(t, t1, t2);
613}
614
615static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
616{
617    TCGv_vec t1, t2;
618
619    int halfbits = 4 << vece;
620
621    t1 = tcg_temp_new_vec_matching(a);
622    t2 = tcg_temp_new_vec_matching(b);
623
624    /* Sign-extend the odd elements for vector */
625    tcg_gen_sari_vec(vece, t1, a, halfbits);
626    tcg_gen_sari_vec(vece, t2, b, halfbits);
627
628    tcg_gen_add_vec(vece, t, t1, t2);
629}
630
631static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
632                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
633{
634    static const TCGOpcode vecop_list[] = {
635        INDEX_op_sari_vec, INDEX_op_add_vec, 0
636        };
637    static const GVecGen3 op[4] = {
638        {
639            .fniv = gen_vaddwod_s,
640            .fno = gen_helper_vaddwod_h_b,
641            .opt_opc = vecop_list,
642            .vece = MO_16
643        },
644        {
645            .fni4 = gen_vaddwod_w_h,
646            .fniv = gen_vaddwod_s,
647            .fno = gen_helper_vaddwod_w_h,
648            .opt_opc = vecop_list,
649            .vece = MO_32
650        },
651        {
652            .fni8 = gen_vaddwod_d_w,
653            .fniv = gen_vaddwod_s,
654            .fno = gen_helper_vaddwod_d_w,
655            .opt_opc = vecop_list,
656            .vece = MO_64
657        },
658        {
659            .fno = gen_helper_vaddwod_q_d,
660            .vece = MO_128
661        },
662    };
663
664    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
665}
666
667TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s)
668TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s)
669TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s)
670TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s)
671TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s)
672TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s)
673TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s)
674TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s)
675
676
677static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
678{
679    TCGv_vec t1, t2;
680
681    int halfbits = 4 << vece;
682
683    t1 = tcg_temp_new_vec_matching(a);
684    t2 = tcg_temp_new_vec_matching(b);
685
686    /* Sign-extend the even elements from a */
687    tcg_gen_shli_vec(vece, t1, a, halfbits);
688    tcg_gen_sari_vec(vece, t1, t1, halfbits);
689
690    /* Sign-extend the even elements from b */
691    tcg_gen_shli_vec(vece, t2, b, halfbits);
692    tcg_gen_sari_vec(vece, t2, t2, halfbits);
693
694    tcg_gen_sub_vec(vece, t, t1, t2);
695}
696
697static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
698{
699    TCGv_i32 t1, t2;
700
701    t1 = tcg_temp_new_i32();
702    t2 = tcg_temp_new_i32();
703    tcg_gen_ext16s_i32(t1, a);
704    tcg_gen_ext16s_i32(t2, b);
705    tcg_gen_sub_i32(t, t1, t2);
706}
707
708static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
709{
710    TCGv_i64 t1, t2;
711
712    t1 = tcg_temp_new_i64();
713    t2 = tcg_temp_new_i64();
714    tcg_gen_ext32s_i64(t1, a);
715    tcg_gen_ext32s_i64(t2, b);
716    tcg_gen_sub_i64(t, t1, t2);
717}
718
719static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
720                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
721{
722    static const TCGOpcode vecop_list[] = {
723        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0
724        };
725    static const GVecGen3 op[4] = {
726        {
727            .fniv = gen_vsubwev_s,
728            .fno = gen_helper_vsubwev_h_b,
729            .opt_opc = vecop_list,
730            .vece = MO_16
731        },
732        {
733            .fni4 = gen_vsubwev_w_h,
734            .fniv = gen_vsubwev_s,
735            .fno = gen_helper_vsubwev_w_h,
736            .opt_opc = vecop_list,
737            .vece = MO_32
738        },
739        {
740            .fni8 = gen_vsubwev_d_w,
741            .fniv = gen_vsubwev_s,
742            .fno = gen_helper_vsubwev_d_w,
743            .opt_opc = vecop_list,
744            .vece = MO_64
745        },
746        {
747            .fno = gen_helper_vsubwev_q_d,
748            .vece = MO_128
749        },
750    };
751
752    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
753}
754
755TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s)
756TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s)
757TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s)
758TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s)
759TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s)
760TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s)
761TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s)
762TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s)
763
764static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
765{
766    TCGv_vec t1, t2;
767
768    int halfbits = 4 << vece;
769
770    t1 = tcg_temp_new_vec_matching(a);
771    t2 = tcg_temp_new_vec_matching(b);
772
773    /* Sign-extend the odd elements for vector */
774    tcg_gen_sari_vec(vece, t1, a, halfbits);
775    tcg_gen_sari_vec(vece, t2, b, halfbits);
776
777    tcg_gen_sub_vec(vece, t, t1, t2);
778}
779
780static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
781{
782    TCGv_i32 t1, t2;
783
784    t1 = tcg_temp_new_i32();
785    t2 = tcg_temp_new_i32();
786    tcg_gen_sari_i32(t1, a, 16);
787    tcg_gen_sari_i32(t2, b, 16);
788    tcg_gen_sub_i32(t, t1, t2);
789}
790
791static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
792{
793    TCGv_i64 t1, t2;
794
795    t1 = tcg_temp_new_i64();
796    t2 = tcg_temp_new_i64();
797    tcg_gen_sari_i64(t1, a, 32);
798    tcg_gen_sari_i64(t2, b, 32);
799    tcg_gen_sub_i64(t, t1, t2);
800}
801
802static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
803                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
804{
805    static const TCGOpcode vecop_list[] = {
806        INDEX_op_sari_vec, INDEX_op_sub_vec, 0
807        };
808    static const GVecGen3 op[4] = {
809        {
810            .fniv = gen_vsubwod_s,
811            .fno = gen_helper_vsubwod_h_b,
812            .opt_opc = vecop_list,
813            .vece = MO_16
814        },
815        {
816            .fni4 = gen_vsubwod_w_h,
817            .fniv = gen_vsubwod_s,
818            .fno = gen_helper_vsubwod_w_h,
819            .opt_opc = vecop_list,
820            .vece = MO_32
821        },
822        {
823            .fni8 = gen_vsubwod_d_w,
824            .fniv = gen_vsubwod_s,
825            .fno = gen_helper_vsubwod_d_w,
826            .opt_opc = vecop_list,
827            .vece = MO_64
828        },
829        {
830            .fno = gen_helper_vsubwod_q_d,
831            .vece = MO_128
832        },
833    };
834
835    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
836}
837
838TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s)
839TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s)
840TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s)
841TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s)
842TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s)
843TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s)
844TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s)
845TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s)
846
847static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
848{
849    TCGv_vec t1, t2, t3;
850
851    t1 = tcg_temp_new_vec_matching(a);
852    t2 = tcg_temp_new_vec_matching(b);
853    t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
854    tcg_gen_and_vec(vece, t1, a, t3);
855    tcg_gen_and_vec(vece, t2, b, t3);
856    tcg_gen_add_vec(vece, t, t1, t2);
857}
858
859static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
860{
861    TCGv_i32 t1, t2;
862
863    t1 = tcg_temp_new_i32();
864    t2 = tcg_temp_new_i32();
865    tcg_gen_ext16u_i32(t1, a);
866    tcg_gen_ext16u_i32(t2, b);
867    tcg_gen_add_i32(t, t1, t2);
868}
869
870static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
871{
872    TCGv_i64 t1, t2;
873
874    t1 = tcg_temp_new_i64();
875    t2 = tcg_temp_new_i64();
876    tcg_gen_ext32u_i64(t1, a);
877    tcg_gen_ext32u_i64(t2, b);
878    tcg_gen_add_i64(t, t1, t2);
879}
880
881static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
882                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
883{
884    static const TCGOpcode vecop_list[] = {
885        INDEX_op_add_vec, 0
886        };
887    static const GVecGen3 op[4] = {
888        {
889            .fniv = gen_vaddwev_u,
890            .fno = gen_helper_vaddwev_h_bu,
891            .opt_opc = vecop_list,
892            .vece = MO_16
893        },
894        {
895            .fni4 = gen_vaddwev_w_hu,
896            .fniv = gen_vaddwev_u,
897            .fno = gen_helper_vaddwev_w_hu,
898            .opt_opc = vecop_list,
899            .vece = MO_32
900        },
901        {
902            .fni8 = gen_vaddwev_d_wu,
903            .fniv = gen_vaddwev_u,
904            .fno = gen_helper_vaddwev_d_wu,
905            .opt_opc = vecop_list,
906            .vece = MO_64
907        },
908        {
909            .fno = gen_helper_vaddwev_q_du,
910            .vece = MO_128
911        },
912    };
913
914    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
915}
916
917TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u)
918TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u)
919TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u)
920TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u)
921TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u)
922TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u)
923TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u)
924TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u)
925
926static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
927{
928    TCGv_vec t1, t2;
929
930    int halfbits = 4 << vece;
931
932    t1 = tcg_temp_new_vec_matching(a);
933    t2 = tcg_temp_new_vec_matching(b);
934
935    /* Zero-extend the odd elements for vector */
936    tcg_gen_shri_vec(vece, t1, a, halfbits);
937    tcg_gen_shri_vec(vece, t2, b, halfbits);
938
939    tcg_gen_add_vec(vece, t, t1, t2);
940}
941
942static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
943{
944    TCGv_i32 t1, t2;
945
946    t1 = tcg_temp_new_i32();
947    t2 = tcg_temp_new_i32();
948    tcg_gen_shri_i32(t1, a, 16);
949    tcg_gen_shri_i32(t2, b, 16);
950    tcg_gen_add_i32(t, t1, t2);
951}
952
953static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
954{
955    TCGv_i64 t1, t2;
956
957    t1 = tcg_temp_new_i64();
958    t2 = tcg_temp_new_i64();
959    tcg_gen_shri_i64(t1, a, 32);
960    tcg_gen_shri_i64(t2, b, 32);
961    tcg_gen_add_i64(t, t1, t2);
962}
963
964static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
965                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
966{
967    static const TCGOpcode vecop_list[] = {
968        INDEX_op_shri_vec, INDEX_op_add_vec, 0
969        };
970    static const GVecGen3 op[4] = {
971        {
972            .fniv = gen_vaddwod_u,
973            .fno = gen_helper_vaddwod_h_bu,
974            .opt_opc = vecop_list,
975            .vece = MO_16
976        },
977        {
978            .fni4 = gen_vaddwod_w_hu,
979            .fniv = gen_vaddwod_u,
980            .fno = gen_helper_vaddwod_w_hu,
981            .opt_opc = vecop_list,
982            .vece = MO_32
983        },
984        {
985            .fni8 = gen_vaddwod_d_wu,
986            .fniv = gen_vaddwod_u,
987            .fno = gen_helper_vaddwod_d_wu,
988            .opt_opc = vecop_list,
989            .vece = MO_64
990        },
991        {
992            .fno = gen_helper_vaddwod_q_du,
993            .vece = MO_128
994        },
995    };
996
997    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
998}
999
1000TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u)
1001TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u)
1002TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u)
1003TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u)
1004TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u)
1005TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u)
1006TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u)
1007TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u)
1008
1009static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1010{
1011    TCGv_vec t1, t2, t3;
1012
1013    t1 = tcg_temp_new_vec_matching(a);
1014    t2 = tcg_temp_new_vec_matching(b);
1015    t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
1016    tcg_gen_and_vec(vece, t1, a, t3);
1017    tcg_gen_and_vec(vece, t2, b, t3);
1018    tcg_gen_sub_vec(vece, t, t1, t2);
1019}
1020
1021static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1022{
1023    TCGv_i32 t1, t2;
1024
1025    t1 = tcg_temp_new_i32();
1026    t2 = tcg_temp_new_i32();
1027    tcg_gen_ext16u_i32(t1, a);
1028    tcg_gen_ext16u_i32(t2, b);
1029    tcg_gen_sub_i32(t, t1, t2);
1030}
1031
1032static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1033{
1034    TCGv_i64 t1, t2;
1035
1036    t1 = tcg_temp_new_i64();
1037    t2 = tcg_temp_new_i64();
1038    tcg_gen_ext32u_i64(t1, a);
1039    tcg_gen_ext32u_i64(t2, b);
1040    tcg_gen_sub_i64(t, t1, t2);
1041}
1042
1043static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1044                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1045{
1046    static const TCGOpcode vecop_list[] = {
1047        INDEX_op_sub_vec, 0
1048        };
1049    static const GVecGen3 op[4] = {
1050        {
1051            .fniv = gen_vsubwev_u,
1052            .fno = gen_helper_vsubwev_h_bu,
1053            .opt_opc = vecop_list,
1054            .vece = MO_16
1055        },
1056        {
1057            .fni4 = gen_vsubwev_w_hu,
1058            .fniv = gen_vsubwev_u,
1059            .fno = gen_helper_vsubwev_w_hu,
1060            .opt_opc = vecop_list,
1061            .vece = MO_32
1062        },
1063        {
1064            .fni8 = gen_vsubwev_d_wu,
1065            .fniv = gen_vsubwev_u,
1066            .fno = gen_helper_vsubwev_d_wu,
1067            .opt_opc = vecop_list,
1068            .vece = MO_64
1069        },
1070        {
1071            .fno = gen_helper_vsubwev_q_du,
1072            .vece = MO_128
1073        },
1074    };
1075
1076    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1077}
1078
1079TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u)
1080TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u)
1081TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u)
1082TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u)
1083TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u)
1084TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u)
1085TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u)
1086TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u)
1087
1088static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1089{
1090    TCGv_vec t1, t2;
1091
1092    int halfbits = 4 << vece;
1093
1094    t1 = tcg_temp_new_vec_matching(a);
1095    t2 = tcg_temp_new_vec_matching(b);
1096
1097    /* Zero-extend the odd elements for vector */
1098    tcg_gen_shri_vec(vece, t1, a, halfbits);
1099    tcg_gen_shri_vec(vece, t2, b, halfbits);
1100
1101    tcg_gen_sub_vec(vece, t, t1, t2);
1102}
1103
1104static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1105{
1106    TCGv_i32 t1, t2;
1107
1108    t1 = tcg_temp_new_i32();
1109    t2 = tcg_temp_new_i32();
1110    tcg_gen_shri_i32(t1, a, 16);
1111    tcg_gen_shri_i32(t2, b, 16);
1112    tcg_gen_sub_i32(t, t1, t2);
1113}
1114
1115static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1116{
1117    TCGv_i64 t1, t2;
1118
1119    t1 = tcg_temp_new_i64();
1120    t2 = tcg_temp_new_i64();
1121    tcg_gen_shri_i64(t1, a, 32);
1122    tcg_gen_shri_i64(t2, b, 32);
1123    tcg_gen_sub_i64(t, t1, t2);
1124}
1125
1126static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1127                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1128{
1129    static const TCGOpcode vecop_list[] = {
1130        INDEX_op_shri_vec, INDEX_op_sub_vec, 0
1131        };
1132    static const GVecGen3 op[4] = {
1133        {
1134            .fniv = gen_vsubwod_u,
1135            .fno = gen_helper_vsubwod_h_bu,
1136            .opt_opc = vecop_list,
1137            .vece = MO_16
1138        },
1139        {
1140            .fni4 = gen_vsubwod_w_hu,
1141            .fniv = gen_vsubwod_u,
1142            .fno = gen_helper_vsubwod_w_hu,
1143            .opt_opc = vecop_list,
1144            .vece = MO_32
1145        },
1146        {
1147            .fni8 = gen_vsubwod_d_wu,
1148            .fniv = gen_vsubwod_u,
1149            .fno = gen_helper_vsubwod_d_wu,
1150            .opt_opc = vecop_list,
1151            .vece = MO_64
1152        },
1153        {
1154            .fno = gen_helper_vsubwod_q_du,
1155            .vece = MO_128
1156        },
1157    };
1158
1159    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1160}
1161
1162TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u)
1163TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u)
1164TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u)
1165TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u)
1166TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u)
1167TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u)
1168TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u)
1169TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u)
1170
1171static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1172{
1173    TCGv_vec t1, t2, t3;
1174
1175    int halfbits = 4 << vece;
1176
1177    t1 = tcg_temp_new_vec_matching(a);
1178    t2 = tcg_temp_new_vec_matching(b);
1179    t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits));
1180
1181    /* Zero-extend the even elements from a */
1182    tcg_gen_and_vec(vece, t1, a, t3);
1183
1184    /* Sign-extend the even elements from b */
1185    tcg_gen_shli_vec(vece, t2, b, halfbits);
1186    tcg_gen_sari_vec(vece, t2, t2, halfbits);
1187
1188    tcg_gen_add_vec(vece, t, t1, t2);
1189}
1190
1191static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1192{
1193    TCGv_i32 t1, t2;
1194
1195    t1 = tcg_temp_new_i32();
1196    t2 = tcg_temp_new_i32();
1197    tcg_gen_ext16u_i32(t1, a);
1198    tcg_gen_ext16s_i32(t2, b);
1199    tcg_gen_add_i32(t, t1, t2);
1200}
1201
1202static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1203{
1204    TCGv_i64 t1, t2;
1205
1206    t1 = tcg_temp_new_i64();
1207    t2 = tcg_temp_new_i64();
1208    tcg_gen_ext32u_i64(t1, a);
1209    tcg_gen_ext32s_i64(t2, b);
1210    tcg_gen_add_i64(t, t1, t2);
1211}
1212
1213static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1214                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1215{
1216    static const TCGOpcode vecop_list[] = {
1217        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
1218        };
1219    static const GVecGen3 op[4] = {
1220        {
1221            .fniv = gen_vaddwev_u_s,
1222            .fno = gen_helper_vaddwev_h_bu_b,
1223            .opt_opc = vecop_list,
1224            .vece = MO_16
1225        },
1226        {
1227            .fni4 = gen_vaddwev_w_hu_h,
1228            .fniv = gen_vaddwev_u_s,
1229            .fno = gen_helper_vaddwev_w_hu_h,
1230            .opt_opc = vecop_list,
1231            .vece = MO_32
1232        },
1233        {
1234            .fni8 = gen_vaddwev_d_wu_w,
1235            .fniv = gen_vaddwev_u_s,
1236            .fno = gen_helper_vaddwev_d_wu_w,
1237            .opt_opc = vecop_list,
1238            .vece = MO_64
1239        },
1240        {
1241            .fno = gen_helper_vaddwev_q_du_d,
1242            .vece = MO_128
1243        },
1244    };
1245
1246    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1247}
1248
1249TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s)
1250TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s)
1251TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s)
1252TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s)
1253TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s)
1254TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s)
1255TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s)
1256TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s)
1257
1258static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1259{
1260    TCGv_vec t1, t2;
1261
1262    int halfbits = 4 << vece;
1263
1264    t1 = tcg_temp_new_vec_matching(a);
1265    t2 = tcg_temp_new_vec_matching(b);
1266
1267    /* Zero-extend the odd elements from a */
1268    tcg_gen_shri_vec(vece, t1, a, halfbits);
1269    /* Sign-extend the odd elements from b */
1270    tcg_gen_sari_vec(vece, t2, b, halfbits);
1271
1272    tcg_gen_add_vec(vece, t, t1, t2);
1273}
1274
1275static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1276{
1277    TCGv_i32 t1, t2;
1278
1279    t1 = tcg_temp_new_i32();
1280    t2 = tcg_temp_new_i32();
1281    tcg_gen_shri_i32(t1, a, 16);
1282    tcg_gen_sari_i32(t2, b, 16);
1283    tcg_gen_add_i32(t, t1, t2);
1284}
1285
1286static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1287{
1288    TCGv_i64 t1, t2;
1289
1290    t1 = tcg_temp_new_i64();
1291    t2 = tcg_temp_new_i64();
1292    tcg_gen_shri_i64(t1, a, 32);
1293    tcg_gen_sari_i64(t2, b, 32);
1294    tcg_gen_add_i64(t, t1, t2);
1295}
1296
1297static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1298                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1299{
1300    static const TCGOpcode vecop_list[] = {
1301        INDEX_op_shri_vec, INDEX_op_sari_vec,  INDEX_op_add_vec, 0
1302        };
1303    static const GVecGen3 op[4] = {
1304        {
1305            .fniv = gen_vaddwod_u_s,
1306            .fno = gen_helper_vaddwod_h_bu_b,
1307            .opt_opc = vecop_list,
1308            .vece = MO_16
1309        },
1310        {
1311            .fni4 = gen_vaddwod_w_hu_h,
1312            .fniv = gen_vaddwod_u_s,
1313            .fno = gen_helper_vaddwod_w_hu_h,
1314            .opt_opc = vecop_list,
1315            .vece = MO_32
1316        },
1317        {
1318            .fni8 = gen_vaddwod_d_wu_w,
1319            .fniv = gen_vaddwod_u_s,
1320            .fno = gen_helper_vaddwod_d_wu_w,
1321            .opt_opc = vecop_list,
1322            .vece = MO_64
1323        },
1324        {
1325            .fno = gen_helper_vaddwod_q_du_d,
1326            .vece = MO_128
1327        },
1328    };
1329
1330    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1331}
1332
1333TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s)
1334TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s)
1335TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s)
1336TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s)
1337TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s)
1338TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s)
1339TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s)
1340TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s)
1341
1342static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
1343                    void (*gen_shr_vec)(unsigned, TCGv_vec,
1344                                        TCGv_vec, int64_t),
1345                    void (*gen_round_vec)(unsigned, TCGv_vec,
1346                                          TCGv_vec, TCGv_vec))
1347{
1348    TCGv_vec tmp = tcg_temp_new_vec_matching(t);
1349    gen_round_vec(vece, tmp, a, b);
1350    tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
1351    gen_shr_vec(vece, a, a, 1);
1352    gen_shr_vec(vece, b, b, 1);
1353    tcg_gen_add_vec(vece, t, a, b);
1354    tcg_gen_add_vec(vece, t, t, tmp);
1355}
1356
1357static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1358{
1359    do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec);
1360}
1361
1362static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1363{
1364    do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec);
1365}
1366
1367static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1368{
1369    do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec);
1370}
1371
1372static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1373{
1374    do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec);
1375}
1376
1377static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1378                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1379{
1380    static const TCGOpcode vecop_list[] = {
1381        INDEX_op_sari_vec, INDEX_op_add_vec, 0
1382        };
1383    static const GVecGen3 op[4] = {
1384        {
1385            .fniv = gen_vavg_s,
1386            .fno = gen_helper_vavg_b,
1387            .opt_opc = vecop_list,
1388            .vece = MO_8
1389        },
1390        {
1391            .fniv = gen_vavg_s,
1392            .fno = gen_helper_vavg_h,
1393            .opt_opc = vecop_list,
1394            .vece = MO_16
1395        },
1396        {
1397            .fniv = gen_vavg_s,
1398            .fno = gen_helper_vavg_w,
1399            .opt_opc = vecop_list,
1400            .vece = MO_32
1401        },
1402        {
1403            .fniv = gen_vavg_s,
1404            .fno = gen_helper_vavg_d,
1405            .opt_opc = vecop_list,
1406            .vece = MO_64
1407        },
1408    };
1409
1410    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1411}
1412
1413static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1414                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1415{
1416    static const TCGOpcode vecop_list[] = {
1417        INDEX_op_shri_vec, INDEX_op_add_vec, 0
1418        };
1419    static const GVecGen3 op[4] = {
1420        {
1421            .fniv = gen_vavg_u,
1422            .fno = gen_helper_vavg_bu,
1423            .opt_opc = vecop_list,
1424            .vece = MO_8
1425        },
1426        {
1427            .fniv = gen_vavg_u,
1428            .fno = gen_helper_vavg_hu,
1429            .opt_opc = vecop_list,
1430            .vece = MO_16
1431        },
1432        {
1433            .fniv = gen_vavg_u,
1434            .fno = gen_helper_vavg_wu,
1435            .opt_opc = vecop_list,
1436            .vece = MO_32
1437        },
1438        {
1439            .fniv = gen_vavg_u,
1440            .fno = gen_helper_vavg_du,
1441            .opt_opc = vecop_list,
1442            .vece = MO_64
1443        },
1444    };
1445
1446    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1447}
1448
1449TRANS(vavg_b, LSX, gvec_vvv, MO_8, do_vavg_s)
1450TRANS(vavg_h, LSX, gvec_vvv, MO_16, do_vavg_s)
1451TRANS(vavg_w, LSX, gvec_vvv, MO_32, do_vavg_s)
1452TRANS(vavg_d, LSX, gvec_vvv, MO_64, do_vavg_s)
1453TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u)
1454TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u)
1455TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u)
1456TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u)
1457TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s)
1458TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s)
1459TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s)
1460TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s)
1461TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u)
1462TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u)
1463TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u)
1464TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u)
1465
1466static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1467                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1468{
1469    static const TCGOpcode vecop_list[] = {
1470        INDEX_op_sari_vec, INDEX_op_add_vec, 0
1471        };
1472    static const GVecGen3 op[4] = {
1473        {
1474            .fniv = gen_vavgr_s,
1475            .fno = gen_helper_vavgr_b,
1476            .opt_opc = vecop_list,
1477            .vece = MO_8
1478        },
1479        {
1480            .fniv = gen_vavgr_s,
1481            .fno = gen_helper_vavgr_h,
1482            .opt_opc = vecop_list,
1483            .vece = MO_16
1484        },
1485        {
1486            .fniv = gen_vavgr_s,
1487            .fno = gen_helper_vavgr_w,
1488            .opt_opc = vecop_list,
1489            .vece = MO_32
1490        },
1491        {
1492            .fniv = gen_vavgr_s,
1493            .fno = gen_helper_vavgr_d,
1494            .opt_opc = vecop_list,
1495            .vece = MO_64
1496        },
1497    };
1498
1499    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1500}
1501
1502static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1503                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1504{
1505    static const TCGOpcode vecop_list[] = {
1506        INDEX_op_shri_vec, INDEX_op_add_vec, 0
1507        };
1508    static const GVecGen3 op[4] = {
1509        {
1510            .fniv = gen_vavgr_u,
1511            .fno = gen_helper_vavgr_bu,
1512            .opt_opc = vecop_list,
1513            .vece = MO_8
1514        },
1515        {
1516            .fniv = gen_vavgr_u,
1517            .fno = gen_helper_vavgr_hu,
1518            .opt_opc = vecop_list,
1519            .vece = MO_16
1520        },
1521        {
1522            .fniv = gen_vavgr_u,
1523            .fno = gen_helper_vavgr_wu,
1524            .opt_opc = vecop_list,
1525            .vece = MO_32
1526        },
1527        {
1528            .fniv = gen_vavgr_u,
1529            .fno = gen_helper_vavgr_du,
1530            .opt_opc = vecop_list,
1531            .vece = MO_64
1532        },
1533    };
1534
1535    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1536}
1537
1538TRANS(vavgr_b, LSX, gvec_vvv, MO_8, do_vavgr_s)
1539TRANS(vavgr_h, LSX, gvec_vvv, MO_16, do_vavgr_s)
1540TRANS(vavgr_w, LSX, gvec_vvv, MO_32, do_vavgr_s)
1541TRANS(vavgr_d, LSX, gvec_vvv, MO_64, do_vavgr_s)
1542TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u)
1543TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u)
1544TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u)
1545TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u)
1546TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s)
1547TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s)
1548TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s)
1549TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s)
1550TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u)
1551TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u)
1552TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u)
1553TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u)
1554
1555static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1556{
1557    tcg_gen_smax_vec(vece, t, a, b);
1558    tcg_gen_smin_vec(vece, a, a, b);
1559    tcg_gen_sub_vec(vece, t, t, a);
1560}
1561
1562static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1563                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1564{
1565    static const TCGOpcode vecop_list[] = {
1566        INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0
1567        };
1568    static const GVecGen3 op[4] = {
1569        {
1570            .fniv = gen_vabsd_s,
1571            .fno = gen_helper_vabsd_b,
1572            .opt_opc = vecop_list,
1573            .vece = MO_8
1574        },
1575        {
1576            .fniv = gen_vabsd_s,
1577            .fno = gen_helper_vabsd_h,
1578            .opt_opc = vecop_list,
1579            .vece = MO_16
1580        },
1581        {
1582            .fniv = gen_vabsd_s,
1583            .fno = gen_helper_vabsd_w,
1584            .opt_opc = vecop_list,
1585            .vece = MO_32
1586        },
1587        {
1588            .fniv = gen_vabsd_s,
1589            .fno = gen_helper_vabsd_d,
1590            .opt_opc = vecop_list,
1591            .vece = MO_64
1592        },
1593    };
1594
1595    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1596}
1597
1598static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1599{
1600    tcg_gen_umax_vec(vece, t, a, b);
1601    tcg_gen_umin_vec(vece, a, a, b);
1602    tcg_gen_sub_vec(vece, t, t, a);
1603}
1604
1605static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1606                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1607{
1608    static const TCGOpcode vecop_list[] = {
1609        INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
1610        };
1611    static const GVecGen3 op[4] = {
1612        {
1613            .fniv = gen_vabsd_u,
1614            .fno = gen_helper_vabsd_bu,
1615            .opt_opc = vecop_list,
1616            .vece = MO_8
1617        },
1618        {
1619            .fniv = gen_vabsd_u,
1620            .fno = gen_helper_vabsd_hu,
1621            .opt_opc = vecop_list,
1622            .vece = MO_16
1623        },
1624        {
1625            .fniv = gen_vabsd_u,
1626            .fno = gen_helper_vabsd_wu,
1627            .opt_opc = vecop_list,
1628            .vece = MO_32
1629        },
1630        {
1631            .fniv = gen_vabsd_u,
1632            .fno = gen_helper_vabsd_du,
1633            .opt_opc = vecop_list,
1634            .vece = MO_64
1635        },
1636    };
1637
1638    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1639}
1640
1641TRANS(vabsd_b, LSX, gvec_vvv, MO_8, do_vabsd_s)
1642TRANS(vabsd_h, LSX, gvec_vvv, MO_16, do_vabsd_s)
1643TRANS(vabsd_w, LSX, gvec_vvv, MO_32, do_vabsd_s)
1644TRANS(vabsd_d, LSX, gvec_vvv, MO_64, do_vabsd_s)
1645TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u)
1646TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u)
1647TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u)
1648TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u)
1649TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s)
1650TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s)
1651TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s)
1652TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s)
1653TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u)
1654TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u)
1655TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u)
1656TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u)
1657
1658static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1659{
1660    TCGv_vec t1, t2;
1661
1662    t1 = tcg_temp_new_vec_matching(a);
1663    t2 = tcg_temp_new_vec_matching(b);
1664
1665    tcg_gen_abs_vec(vece, t1, a);
1666    tcg_gen_abs_vec(vece, t2, b);
1667    tcg_gen_add_vec(vece, t, t1, t2);
1668}
1669
1670static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1671                     uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1672{
1673    static const TCGOpcode vecop_list[] = {
1674        INDEX_op_abs_vec, INDEX_op_add_vec, 0
1675        };
1676    static const GVecGen3 op[4] = {
1677        {
1678            .fniv = gen_vadda,
1679            .fno = gen_helper_vadda_b,
1680            .opt_opc = vecop_list,
1681            .vece = MO_8
1682        },
1683        {
1684            .fniv = gen_vadda,
1685            .fno = gen_helper_vadda_h,
1686            .opt_opc = vecop_list,
1687            .vece = MO_16
1688        },
1689        {
1690            .fniv = gen_vadda,
1691            .fno = gen_helper_vadda_w,
1692            .opt_opc = vecop_list,
1693            .vece = MO_32
1694        },
1695        {
1696            .fniv = gen_vadda,
1697            .fno = gen_helper_vadda_d,
1698            .opt_opc = vecop_list,
1699            .vece = MO_64
1700        },
1701    };
1702
1703    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1704}
1705
1706TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda)
1707TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda)
1708TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda)
1709TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda)
1710TRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda)
1711TRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda)
1712TRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda)
1713TRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda)
1714
1715TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax)
1716TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax)
1717TRANS(vmax_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smax)
1718TRANS(vmax_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smax)
1719TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax)
1720TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax)
1721TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax)
1722TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax)
1723TRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax)
1724TRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax)
1725TRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax)
1726TRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax)
1727TRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax)
1728TRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax)
1729TRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax)
1730TRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax)
1731
1732TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin)
1733TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin)
1734TRANS(vmin_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smin)
1735TRANS(vmin_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smin)
1736TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin)
1737TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin)
1738TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin)
1739TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin)
1740TRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin)
1741TRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin)
1742TRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin)
1743TRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin)
1744TRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin)
1745TRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin)
1746TRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin)
1747TRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin)
1748
1749static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1750{
1751    tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1752}
1753
1754static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1755{
1756    tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1757}
1758
1759static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1760{
1761    tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1762}
1763
1764static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1765{
1766    tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1767}
1768
1769static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1770                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1771{
1772    static const TCGOpcode vecop_list[] = {
1773        INDEX_op_smin_vec, 0
1774        };
1775    static const GVecGen2i op[4] = {
1776        {
1777            .fniv = gen_vmini_s,
1778            .fnoi = gen_helper_vmini_b,
1779            .opt_opc = vecop_list,
1780            .vece = MO_8
1781        },
1782        {
1783            .fniv = gen_vmini_s,
1784            .fnoi = gen_helper_vmini_h,
1785            .opt_opc = vecop_list,
1786            .vece = MO_16
1787        },
1788        {
1789            .fniv = gen_vmini_s,
1790            .fnoi = gen_helper_vmini_w,
1791            .opt_opc = vecop_list,
1792            .vece = MO_32
1793        },
1794        {
1795            .fniv = gen_vmini_s,
1796            .fnoi = gen_helper_vmini_d,
1797            .opt_opc = vecop_list,
1798            .vece = MO_64
1799        },
1800    };
1801
1802    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1803}
1804
1805static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1806                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1807{
1808    static const TCGOpcode vecop_list[] = {
1809        INDEX_op_umin_vec, 0
1810        };
1811    static const GVecGen2i op[4] = {
1812        {
1813            .fniv = gen_vmini_u,
1814            .fnoi = gen_helper_vmini_bu,
1815            .opt_opc = vecop_list,
1816            .vece = MO_8
1817        },
1818        {
1819            .fniv = gen_vmini_u,
1820            .fnoi = gen_helper_vmini_hu,
1821            .opt_opc = vecop_list,
1822            .vece = MO_16
1823        },
1824        {
1825            .fniv = gen_vmini_u,
1826            .fnoi = gen_helper_vmini_wu,
1827            .opt_opc = vecop_list,
1828            .vece = MO_32
1829        },
1830        {
1831            .fniv = gen_vmini_u,
1832            .fnoi = gen_helper_vmini_du,
1833            .opt_opc = vecop_list,
1834            .vece = MO_64
1835        },
1836    };
1837
1838    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1839}
1840
1841TRANS(vmini_b, LSX, gvec_vv_i, MO_8, do_vmini_s)
1842TRANS(vmini_h, LSX, gvec_vv_i, MO_16, do_vmini_s)
1843TRANS(vmini_w, LSX, gvec_vv_i, MO_32, do_vmini_s)
1844TRANS(vmini_d, LSX, gvec_vv_i, MO_64, do_vmini_s)
1845TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u)
1846TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u)
1847TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u)
1848TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u)
1849TRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s)
1850TRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s)
1851TRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s)
1852TRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s)
1853TRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u)
1854TRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u)
1855TRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u)
1856TRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u)
1857
1858static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1859                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1860{
1861    static const TCGOpcode vecop_list[] = {
1862        INDEX_op_smax_vec, 0
1863        };
1864    static const GVecGen2i op[4] = {
1865        {
1866            .fniv = gen_vmaxi_s,
1867            .fnoi = gen_helper_vmaxi_b,
1868            .opt_opc = vecop_list,
1869            .vece = MO_8
1870        },
1871        {
1872            .fniv = gen_vmaxi_s,
1873            .fnoi = gen_helper_vmaxi_h,
1874            .opt_opc = vecop_list,
1875            .vece = MO_16
1876        },
1877        {
1878            .fniv = gen_vmaxi_s,
1879            .fnoi = gen_helper_vmaxi_w,
1880            .opt_opc = vecop_list,
1881            .vece = MO_32
1882        },
1883        {
1884            .fniv = gen_vmaxi_s,
1885            .fnoi = gen_helper_vmaxi_d,
1886            .opt_opc = vecop_list,
1887            .vece = MO_64
1888        },
1889    };
1890
1891    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1892}
1893
1894static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1895                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
1896{
1897    static const TCGOpcode vecop_list[] = {
1898        INDEX_op_umax_vec, 0
1899        };
1900    static const GVecGen2i op[4] = {
1901        {
1902            .fniv = gen_vmaxi_u,
1903            .fnoi = gen_helper_vmaxi_bu,
1904            .opt_opc = vecop_list,
1905            .vece = MO_8
1906        },
1907        {
1908            .fniv = gen_vmaxi_u,
1909            .fnoi = gen_helper_vmaxi_hu,
1910            .opt_opc = vecop_list,
1911            .vece = MO_16
1912        },
1913        {
1914            .fniv = gen_vmaxi_u,
1915            .fnoi = gen_helper_vmaxi_wu,
1916            .opt_opc = vecop_list,
1917            .vece = MO_32
1918        },
1919        {
1920            .fniv = gen_vmaxi_u,
1921            .fnoi = gen_helper_vmaxi_du,
1922            .opt_opc = vecop_list,
1923            .vece = MO_64
1924        },
1925    };
1926
1927    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1928}
1929
1930TRANS(vmaxi_b, LSX, gvec_vv_i, MO_8, do_vmaxi_s)
1931TRANS(vmaxi_h, LSX, gvec_vv_i, MO_16, do_vmaxi_s)
1932TRANS(vmaxi_w, LSX, gvec_vv_i, MO_32, do_vmaxi_s)
1933TRANS(vmaxi_d, LSX, gvec_vv_i, MO_64, do_vmaxi_s)
1934TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u)
1935TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u)
1936TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u)
1937TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u)
1938TRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s)
1939TRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s)
1940TRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s)
1941TRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s)
1942TRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u)
1943TRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u)
1944TRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u)
1945TRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u)
1946
1947TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
1948TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
1949TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul)
1950TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul)
1951TRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul)
1952TRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul)
1953TRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul)
1954TRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul)
1955
1956static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1957{
1958    TCGv_i32 discard = tcg_temp_new_i32();
1959    tcg_gen_muls2_i32(discard, t, a, b);
1960}
1961
1962static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1963{
1964    TCGv_i64 discard = tcg_temp_new_i64();
1965    tcg_gen_muls2_i64(discard, t, a, b);
1966}
1967
1968static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1969                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1970{
1971    static const GVecGen3 op[4] = {
1972        {
1973            .fno = gen_helper_vmuh_b,
1974            .vece = MO_8
1975        },
1976        {
1977            .fno = gen_helper_vmuh_h,
1978            .vece = MO_16
1979        },
1980        {
1981            .fni4 = gen_vmuh_w,
1982            .fno = gen_helper_vmuh_w,
1983            .vece = MO_32
1984        },
1985        {
1986            .fni8 = gen_vmuh_d,
1987            .fno = gen_helper_vmuh_d,
1988            .vece = MO_64
1989        },
1990    };
1991
1992    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1993}
1994
1995TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s)
1996TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s)
1997TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s)
1998TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s)
1999TRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s)
2000TRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s)
2001TRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s)
2002TRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s)
2003
2004static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2005{
2006    TCGv_i32 discard = tcg_temp_new_i32();
2007    tcg_gen_mulu2_i32(discard, t, a, b);
2008}
2009
2010static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2011{
2012    TCGv_i64 discard = tcg_temp_new_i64();
2013    tcg_gen_mulu2_i64(discard, t, a, b);
2014}
2015
2016static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2017                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2018{
2019    static const GVecGen3 op[4] = {
2020        {
2021            .fno = gen_helper_vmuh_bu,
2022            .vece = MO_8
2023        },
2024        {
2025            .fno = gen_helper_vmuh_hu,
2026            .vece = MO_16
2027        },
2028        {
2029            .fni4 = gen_vmuh_wu,
2030            .fno = gen_helper_vmuh_wu,
2031            .vece = MO_32
2032        },
2033        {
2034            .fni8 = gen_vmuh_du,
2035            .fno = gen_helper_vmuh_du,
2036            .vece = MO_64
2037        },
2038    };
2039
2040    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2041}
2042
2043TRANS(vmuh_bu, LSX, gvec_vvv, MO_8,  do_vmuh_u)
2044TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u)
2045TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u)
2046TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u)
2047TRANS(xvmuh_bu, LASX, gvec_xxx, MO_8,  do_vmuh_u)
2048TRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u)
2049TRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u)
2050TRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u)
2051
2052static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2053{
2054    TCGv_vec t1, t2;
2055    int halfbits = 4 << vece;
2056
2057    t1 = tcg_temp_new_vec_matching(a);
2058    t2 = tcg_temp_new_vec_matching(b);
2059    tcg_gen_shli_vec(vece, t1, a, halfbits);
2060    tcg_gen_sari_vec(vece, t1, t1, halfbits);
2061    tcg_gen_shli_vec(vece, t2, b, halfbits);
2062    tcg_gen_sari_vec(vece, t2, t2, halfbits);
2063    tcg_gen_mul_vec(vece, t, t1, t2);
2064}
2065
2066static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2067{
2068    TCGv_i32 t1, t2;
2069
2070    t1 = tcg_temp_new_i32();
2071    t2 = tcg_temp_new_i32();
2072    tcg_gen_ext16s_i32(t1, a);
2073    tcg_gen_ext16s_i32(t2, b);
2074    tcg_gen_mul_i32(t, t1, t2);
2075}
2076
2077static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2078{
2079    TCGv_i64 t1, t2;
2080
2081    t1 = tcg_temp_new_i64();
2082    t2 = tcg_temp_new_i64();
2083    tcg_gen_ext32s_i64(t1, a);
2084    tcg_gen_ext32s_i64(t2, b);
2085    tcg_gen_mul_i64(t, t1, t2);
2086}
2087
2088static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2089                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2090{
2091    static const TCGOpcode vecop_list[] = {
2092        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2093        };
2094    static const GVecGen3 op[3] = {
2095        {
2096            .fniv = gen_vmulwev_s,
2097            .fno = gen_helper_vmulwev_h_b,
2098            .opt_opc = vecop_list,
2099            .vece = MO_16
2100        },
2101        {
2102            .fni4 = gen_vmulwev_w_h,
2103            .fniv = gen_vmulwev_s,
2104            .fno = gen_helper_vmulwev_w_h,
2105            .opt_opc = vecop_list,
2106            .vece = MO_32
2107        },
2108        {
2109            .fni8 = gen_vmulwev_d_w,
2110            .fniv = gen_vmulwev_s,
2111            .fno = gen_helper_vmulwev_d_w,
2112            .opt_opc = vecop_list,
2113            .vece = MO_64
2114        },
2115    };
2116
2117    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2118}
2119
2120TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s)
2121TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s)
2122TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s)
2123TRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s)
2124TRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s)
2125TRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s)
2126
2127static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
2128                               TCGv_i64 arg1, TCGv_i64 arg2)
2129{
2130    tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
2131}
2132
2133static bool gen_vmul_q_vl(DisasContext *ctx,
2134                          arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
2135                          void (*func)(TCGv_i64, TCGv_i64,
2136                                       TCGv_i64, TCGv_i64))
2137{
2138    TCGv_i64 rh, rl, arg1, arg2;
2139    int i;
2140
2141    if (!check_vec(ctx, oprsz)) {
2142        return true;
2143    }
2144
2145    rh = tcg_temp_new_i64();
2146    rl = tcg_temp_new_i64();
2147    arg1 = tcg_temp_new_i64();
2148    arg2 = tcg_temp_new_i64();
2149
2150    for (i = 0; i < oprsz / 16; i++) {
2151        get_vreg64(arg1, a->vj, 2 * i + idx1);
2152        get_vreg64(arg2, a->vk, 2 * i + idx2);
2153
2154        func(rl, rh, arg1, arg2);
2155
2156        set_vreg64(rh, a->vd, 2 * i + 1);
2157        set_vreg64(rl, a->vd, 2 * i);
2158    }
2159
2160    return true;
2161}
2162
2163static bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2164                       void (*func)(TCGv_i64, TCGv_i64,
2165                                    TCGv_i64, TCGv_i64))
2166{
2167    return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func);
2168}
2169
2170static bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2171                        void (*func)(TCGv_i64, TCGv_i64,
2172                                     TCGv_i64, TCGv_i64))
2173{
2174    return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func);
2175}
2176
2177TRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64)
2178TRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64)
2179TRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64)
2180TRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64)
2181TRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64)
2182TRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64)
2183TRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64)
2184TRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64)
2185TRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64)
2186TRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64)
2187TRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64)
2188TRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64)
2189
2190static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2191{
2192    TCGv_vec t1, t2;
2193    int halfbits = 4 << vece;
2194
2195    t1 = tcg_temp_new_vec_matching(a);
2196    t2 = tcg_temp_new_vec_matching(b);
2197    tcg_gen_sari_vec(vece, t1, a, halfbits);
2198    tcg_gen_sari_vec(vece, t2, b, halfbits);
2199    tcg_gen_mul_vec(vece, t, t1, t2);
2200}
2201
2202static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2203{
2204    TCGv_i32 t1, t2;
2205
2206    t1 = tcg_temp_new_i32();
2207    t2 = tcg_temp_new_i32();
2208    tcg_gen_sari_i32(t1, a, 16);
2209    tcg_gen_sari_i32(t2, b, 16);
2210    tcg_gen_mul_i32(t, t1, t2);
2211}
2212
2213static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2214{
2215    TCGv_i64 t1, t2;
2216
2217    t1 = tcg_temp_new_i64();
2218    t2 = tcg_temp_new_i64();
2219    tcg_gen_sari_i64(t1, a, 32);
2220    tcg_gen_sari_i64(t2, b, 32);
2221    tcg_gen_mul_i64(t, t1, t2);
2222}
2223
2224static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2225                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2226{
2227    static const TCGOpcode vecop_list[] = {
2228        INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2229        };
2230    static const GVecGen3 op[3] = {
2231        {
2232            .fniv = gen_vmulwod_s,
2233            .fno = gen_helper_vmulwod_h_b,
2234            .opt_opc = vecop_list,
2235            .vece = MO_16
2236        },
2237        {
2238            .fni4 = gen_vmulwod_w_h,
2239            .fniv = gen_vmulwod_s,
2240            .fno = gen_helper_vmulwod_w_h,
2241            .opt_opc = vecop_list,
2242            .vece = MO_32
2243        },
2244        {
2245            .fni8 = gen_vmulwod_d_w,
2246            .fniv = gen_vmulwod_s,
2247            .fno = gen_helper_vmulwod_d_w,
2248            .opt_opc = vecop_list,
2249            .vece = MO_64
2250        },
2251    };
2252
2253    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2254}
2255
2256TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s)
2257TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s)
2258TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s)
2259TRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s)
2260TRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s)
2261TRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s)
2262
2263static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2264{
2265    TCGv_vec t1, t2, mask;
2266
2267    t1 = tcg_temp_new_vec_matching(a);
2268    t2 = tcg_temp_new_vec_matching(b);
2269    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2270    tcg_gen_and_vec(vece, t1, a, mask);
2271    tcg_gen_and_vec(vece, t2, b, mask);
2272    tcg_gen_mul_vec(vece, t, t1, t2);
2273}
2274
2275static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2276{
2277    TCGv_i32 t1, t2;
2278
2279    t1 = tcg_temp_new_i32();
2280    t2 = tcg_temp_new_i32();
2281    tcg_gen_ext16u_i32(t1, a);
2282    tcg_gen_ext16u_i32(t2, b);
2283    tcg_gen_mul_i32(t, t1, t2);
2284}
2285
2286static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2287{
2288    TCGv_i64 t1, t2;
2289
2290    t1 = tcg_temp_new_i64();
2291    t2 = tcg_temp_new_i64();
2292    tcg_gen_ext32u_i64(t1, a);
2293    tcg_gen_ext32u_i64(t2, b);
2294    tcg_gen_mul_i64(t, t1, t2);
2295}
2296
2297static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2298                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2299{
2300    static const TCGOpcode vecop_list[] = {
2301        INDEX_op_mul_vec, 0
2302        };
2303    static const GVecGen3 op[3] = {
2304        {
2305            .fniv = gen_vmulwev_u,
2306            .fno = gen_helper_vmulwev_h_bu,
2307            .opt_opc = vecop_list,
2308            .vece = MO_16
2309        },
2310        {
2311            .fni4 = gen_vmulwev_w_hu,
2312            .fniv = gen_vmulwev_u,
2313            .fno = gen_helper_vmulwev_w_hu,
2314            .opt_opc = vecop_list,
2315            .vece = MO_32
2316        },
2317        {
2318            .fni8 = gen_vmulwev_d_wu,
2319            .fniv = gen_vmulwev_u,
2320            .fno = gen_helper_vmulwev_d_wu,
2321            .opt_opc = vecop_list,
2322            .vece = MO_64
2323        },
2324    };
2325
2326    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2327}
2328
2329TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u)
2330TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u)
2331TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u)
2332TRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u)
2333TRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u)
2334TRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u)
2335
2336static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2337{
2338    TCGv_vec t1, t2;
2339    int halfbits = 4 << vece;
2340
2341    t1 = tcg_temp_new_vec_matching(a);
2342    t2 = tcg_temp_new_vec_matching(b);
2343    tcg_gen_shri_vec(vece, t1, a, halfbits);
2344    tcg_gen_shri_vec(vece, t2, b, halfbits);
2345    tcg_gen_mul_vec(vece, t, t1, t2);
2346}
2347
2348static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2349{
2350    TCGv_i32 t1, t2;
2351
2352    t1 = tcg_temp_new_i32();
2353    t2 = tcg_temp_new_i32();
2354    tcg_gen_shri_i32(t1, a, 16);
2355    tcg_gen_shri_i32(t2, b, 16);
2356    tcg_gen_mul_i32(t, t1, t2);
2357}
2358
2359static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2360{
2361    TCGv_i64 t1, t2;
2362
2363    t1 = tcg_temp_new_i64();
2364    t2 = tcg_temp_new_i64();
2365    tcg_gen_shri_i64(t1, a, 32);
2366    tcg_gen_shri_i64(t2, b, 32);
2367    tcg_gen_mul_i64(t, t1, t2);
2368}
2369
2370static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2371                         uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2372{
2373    static const TCGOpcode vecop_list[] = {
2374        INDEX_op_shri_vec, INDEX_op_mul_vec, 0
2375        };
2376    static const GVecGen3 op[3] = {
2377        {
2378            .fniv = gen_vmulwod_u,
2379            .fno = gen_helper_vmulwod_h_bu,
2380            .opt_opc = vecop_list,
2381            .vece = MO_16
2382        },
2383        {
2384            .fni4 = gen_vmulwod_w_hu,
2385            .fniv = gen_vmulwod_u,
2386            .fno = gen_helper_vmulwod_w_hu,
2387            .opt_opc = vecop_list,
2388            .vece = MO_32
2389        },
2390        {
2391            .fni8 = gen_vmulwod_d_wu,
2392            .fniv = gen_vmulwod_u,
2393            .fno = gen_helper_vmulwod_d_wu,
2394            .opt_opc = vecop_list,
2395            .vece = MO_64
2396        },
2397    };
2398
2399    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2400}
2401
2402TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u)
2403TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u)
2404TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u)
2405TRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u)
2406TRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u)
2407TRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u)
2408
2409static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2410{
2411    TCGv_vec t1, t2, mask;
2412    int halfbits = 4 << vece;
2413
2414    t1 = tcg_temp_new_vec_matching(a);
2415    t2 = tcg_temp_new_vec_matching(b);
2416    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2417    tcg_gen_and_vec(vece, t1, a, mask);
2418    tcg_gen_shli_vec(vece, t2, b, halfbits);
2419    tcg_gen_sari_vec(vece, t2, t2, halfbits);
2420    tcg_gen_mul_vec(vece, t, t1, t2);
2421}
2422
2423static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2424{
2425    TCGv_i32 t1, t2;
2426
2427    t1 = tcg_temp_new_i32();
2428    t2 = tcg_temp_new_i32();
2429    tcg_gen_ext16u_i32(t1, a);
2430    tcg_gen_ext16s_i32(t2, b);
2431    tcg_gen_mul_i32(t, t1, t2);
2432}
2433
2434static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2435{
2436    TCGv_i64 t1, t2;
2437
2438    t1 = tcg_temp_new_i64();
2439    t2 = tcg_temp_new_i64();
2440    tcg_gen_ext32u_i64(t1, a);
2441    tcg_gen_ext32s_i64(t2, b);
2442    tcg_gen_mul_i64(t, t1, t2);
2443}
2444
2445static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2446                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2447{
2448    static const TCGOpcode vecop_list[] = {
2449        INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2450        };
2451    static const GVecGen3 op[3] = {
2452        {
2453            .fniv = gen_vmulwev_u_s,
2454            .fno = gen_helper_vmulwev_h_bu_b,
2455            .opt_opc = vecop_list,
2456            .vece = MO_16
2457        },
2458        {
2459            .fni4 = gen_vmulwev_w_hu_h,
2460            .fniv = gen_vmulwev_u_s,
2461            .fno = gen_helper_vmulwev_w_hu_h,
2462            .opt_opc = vecop_list,
2463            .vece = MO_32
2464        },
2465        {
2466            .fni8 = gen_vmulwev_d_wu_w,
2467            .fniv = gen_vmulwev_u_s,
2468            .fno = gen_helper_vmulwev_d_wu_w,
2469            .opt_opc = vecop_list,
2470            .vece = MO_64
2471        },
2472    };
2473
2474    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2475}
2476
2477TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s)
2478TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s)
2479TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s)
2480TRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s)
2481TRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s)
2482TRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s)
2483
2484static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2485{
2486    TCGv_vec t1, t2;
2487    int halfbits = 4 << vece;
2488
2489    t1 = tcg_temp_new_vec_matching(a);
2490    t2 = tcg_temp_new_vec_matching(b);
2491    tcg_gen_shri_vec(vece, t1, a, halfbits);
2492    tcg_gen_sari_vec(vece, t2, b, halfbits);
2493    tcg_gen_mul_vec(vece, t, t1, t2);
2494}
2495
2496static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2497{
2498    TCGv_i32 t1, t2;
2499
2500    t1 = tcg_temp_new_i32();
2501    t2 = tcg_temp_new_i32();
2502    tcg_gen_shri_i32(t1, a, 16);
2503    tcg_gen_sari_i32(t2, b, 16);
2504    tcg_gen_mul_i32(t, t1, t2);
2505}
2506static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2507{
2508    TCGv_i64 t1, t2;
2509
2510    t1 = tcg_temp_new_i64();
2511    t2 = tcg_temp_new_i64();
2512    tcg_gen_shri_i64(t1, a, 32);
2513    tcg_gen_sari_i64(t2, b, 32);
2514    tcg_gen_mul_i64(t, t1, t2);
2515}
2516
2517static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2518                           uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2519{
2520    static const TCGOpcode vecop_list[] = {
2521        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2522        };
2523    static const GVecGen3 op[3] = {
2524        {
2525            .fniv = gen_vmulwod_u_s,
2526            .fno = gen_helper_vmulwod_h_bu_b,
2527            .opt_opc = vecop_list,
2528            .vece = MO_16
2529        },
2530        {
2531            .fni4 = gen_vmulwod_w_hu_h,
2532            .fniv = gen_vmulwod_u_s,
2533            .fno = gen_helper_vmulwod_w_hu_h,
2534            .opt_opc = vecop_list,
2535            .vece = MO_32
2536        },
2537        {
2538            .fni8 = gen_vmulwod_d_wu_w,
2539            .fniv = gen_vmulwod_u_s,
2540            .fno = gen_helper_vmulwod_d_wu_w,
2541            .opt_opc = vecop_list,
2542            .vece = MO_64
2543        },
2544    };
2545
2546    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2547}
2548
2549TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s)
2550TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s)
2551TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s)
2552TRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s)
2553TRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s)
2554TRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s)
2555
2556static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2557{
2558    TCGv_vec t1;
2559
2560    t1 = tcg_temp_new_vec_matching(t);
2561    tcg_gen_mul_vec(vece, t1, a, b);
2562    tcg_gen_add_vec(vece, t, t, t1);
2563}
2564
2565static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2566{
2567    TCGv_i32 t1;
2568
2569    t1 = tcg_temp_new_i32();
2570    tcg_gen_mul_i32(t1, a, b);
2571    tcg_gen_add_i32(t, t, t1);
2572}
2573
2574static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2575{
2576    TCGv_i64 t1;
2577
2578    t1 = tcg_temp_new_i64();
2579    tcg_gen_mul_i64(t1, a, b);
2580    tcg_gen_add_i64(t, t, t1);
2581}
2582
2583static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2584                     uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2585{
2586    static const TCGOpcode vecop_list[] = {
2587        INDEX_op_mul_vec, INDEX_op_add_vec, 0
2588        };
2589    static const GVecGen3 op[4] = {
2590        {
2591            .fniv = gen_vmadd,
2592            .fno = gen_helper_vmadd_b,
2593            .load_dest = true,
2594            .opt_opc = vecop_list,
2595            .vece = MO_8
2596        },
2597        {
2598            .fniv = gen_vmadd,
2599            .fno = gen_helper_vmadd_h,
2600            .load_dest = true,
2601            .opt_opc = vecop_list,
2602            .vece = MO_16
2603        },
2604        {
2605            .fni4 = gen_vmadd_w,
2606            .fniv = gen_vmadd,
2607            .fno = gen_helper_vmadd_w,
2608            .load_dest = true,
2609            .opt_opc = vecop_list,
2610            .vece = MO_32
2611        },
2612        {
2613            .fni8 = gen_vmadd_d,
2614            .fniv = gen_vmadd,
2615            .fno = gen_helper_vmadd_d,
2616            .load_dest = true,
2617            .opt_opc = vecop_list,
2618            .vece = MO_64
2619        },
2620    };
2621
2622    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2623}
2624
2625TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd)
2626TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd)
2627TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd)
2628TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd)
2629TRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd)
2630TRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd)
2631TRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd)
2632TRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd)
2633
2634static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2635{
2636    TCGv_vec t1;
2637
2638    t1 = tcg_temp_new_vec_matching(t);
2639    tcg_gen_mul_vec(vece, t1, a, b);
2640    tcg_gen_sub_vec(vece, t, t, t1);
2641}
2642
2643static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2644{
2645    TCGv_i32 t1;
2646
2647    t1 = tcg_temp_new_i32();
2648    tcg_gen_mul_i32(t1, a, b);
2649    tcg_gen_sub_i32(t, t, t1);
2650}
2651
2652static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2653{
2654    TCGv_i64 t1;
2655
2656    t1 = tcg_temp_new_i64();
2657    tcg_gen_mul_i64(t1, a, b);
2658    tcg_gen_sub_i64(t, t, t1);
2659}
2660
2661static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2662                     uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2663{
2664    static const TCGOpcode vecop_list[] = {
2665        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
2666        };
2667    static const GVecGen3 op[4] = {
2668        {
2669            .fniv = gen_vmsub,
2670            .fno = gen_helper_vmsub_b,
2671            .load_dest = true,
2672            .opt_opc = vecop_list,
2673            .vece = MO_8
2674        },
2675        {
2676            .fniv = gen_vmsub,
2677            .fno = gen_helper_vmsub_h,
2678            .load_dest = true,
2679            .opt_opc = vecop_list,
2680            .vece = MO_16
2681        },
2682        {
2683            .fni4 = gen_vmsub_w,
2684            .fniv = gen_vmsub,
2685            .fno = gen_helper_vmsub_w,
2686            .load_dest = true,
2687            .opt_opc = vecop_list,
2688            .vece = MO_32
2689        },
2690        {
2691            .fni8 = gen_vmsub_d,
2692            .fniv = gen_vmsub,
2693            .fno = gen_helper_vmsub_d,
2694            .load_dest = true,
2695            .opt_opc = vecop_list,
2696            .vece = MO_64
2697        },
2698    };
2699
2700    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2701}
2702
2703TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub)
2704TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub)
2705TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub)
2706TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub)
2707TRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub)
2708TRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub)
2709TRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub)
2710TRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub)
2711
2712static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2713{
2714    TCGv_vec t1, t2, t3;
2715    int halfbits = 4 << vece;
2716
2717    t1 = tcg_temp_new_vec_matching(a);
2718    t2 = tcg_temp_new_vec_matching(b);
2719    t3 = tcg_temp_new_vec_matching(t);
2720    tcg_gen_shli_vec(vece, t1, a, halfbits);
2721    tcg_gen_sari_vec(vece, t1, t1, halfbits);
2722    tcg_gen_shli_vec(vece, t2, b, halfbits);
2723    tcg_gen_sari_vec(vece, t2, t2, halfbits);
2724    tcg_gen_mul_vec(vece, t3, t1, t2);
2725    tcg_gen_add_vec(vece, t, t, t3);
2726}
2727
2728static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2729{
2730    TCGv_i32 t1;
2731
2732    t1 = tcg_temp_new_i32();
2733    gen_vmulwev_w_h(t1, a, b);
2734    tcg_gen_add_i32(t, t, t1);
2735}
2736
2737static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2738{
2739    TCGv_i64 t1;
2740
2741    t1 = tcg_temp_new_i64();
2742    gen_vmulwev_d_w(t1, a, b);
2743    tcg_gen_add_i64(t, t, t1);
2744}
2745
2746static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2747                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2748{
2749    static const TCGOpcode vecop_list[] = {
2750        INDEX_op_shli_vec, INDEX_op_sari_vec,
2751        INDEX_op_mul_vec, INDEX_op_add_vec, 0
2752        };
2753    static const GVecGen3 op[3] = {
2754        {
2755            .fniv = gen_vmaddwev_s,
2756            .fno = gen_helper_vmaddwev_h_b,
2757            .load_dest = true,
2758            .opt_opc = vecop_list,
2759            .vece = MO_16
2760        },
2761        {
2762            .fni4 = gen_vmaddwev_w_h,
2763            .fniv = gen_vmaddwev_s,
2764            .fno = gen_helper_vmaddwev_w_h,
2765            .load_dest = true,
2766            .opt_opc = vecop_list,
2767            .vece = MO_32
2768        },
2769        {
2770            .fni8 = gen_vmaddwev_d_w,
2771            .fniv = gen_vmaddwev_s,
2772            .fno = gen_helper_vmaddwev_d_w,
2773            .load_dest = true,
2774            .opt_opc = vecop_list,
2775            .vece = MO_64
2776        },
2777    };
2778
2779    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2780}
2781
2782TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s)
2783TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s)
2784TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s)
2785TRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s)
2786TRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s)
2787TRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s)
2788
2789static bool gen_vmadd_q_vl(DisasContext * ctx,
2790                           arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
2791                           void (*func)(TCGv_i64, TCGv_i64,
2792                                        TCGv_i64, TCGv_i64))
2793{
2794    TCGv_i64 rh, rl, arg1, arg2, th, tl;
2795    int i;
2796
2797    if (!check_vec(ctx, oprsz)) {
2798        return true;
2799    }
2800
2801    rh = tcg_temp_new_i64();
2802    rl = tcg_temp_new_i64();
2803    arg1 = tcg_temp_new_i64();
2804    arg2 = tcg_temp_new_i64();
2805    th = tcg_temp_new_i64();
2806    tl = tcg_temp_new_i64();
2807
2808    for (i = 0; i < oprsz / 16; i++) {
2809        get_vreg64(arg1, a->vj, 2 * i + idx1);
2810        get_vreg64(arg2, a->vk, 2 * i + idx2);
2811        get_vreg64(rh, a->vd, 2 * i + 1);
2812        get_vreg64(rl, a->vd, 2 * i);
2813
2814        func(tl, th, arg1, arg2);
2815        tcg_gen_add2_i64(rl, rh, rl, rh, tl, th);
2816
2817        set_vreg64(rh, a->vd, 2 * i + 1);
2818        set_vreg64(rl, a->vd, 2 * i);
2819    }
2820
2821    return true;
2822}
2823
2824static bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2825                        void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
2826{
2827    return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func);
2828}
2829
2830static bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
2831                         void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
2832{
2833    return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func);
2834}
2835
2836TRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64)
2837TRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64)
2838TRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64)
2839TRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64)
2840TRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64)
2841TRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64)
2842TRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64)
2843TRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64)
2844TRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64)
2845TRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64)
2846TRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64)
2847TRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64)
2848
2849static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2850{
2851    TCGv_vec t1, t2, t3;
2852    int halfbits = 4 << vece;
2853
2854    t1 = tcg_temp_new_vec_matching(a);
2855    t2 = tcg_temp_new_vec_matching(b);
2856    t3 = tcg_temp_new_vec_matching(t);
2857    tcg_gen_sari_vec(vece, t1, a, halfbits);
2858    tcg_gen_sari_vec(vece, t2, b, halfbits);
2859    tcg_gen_mul_vec(vece, t3, t1, t2);
2860    tcg_gen_add_vec(vece, t, t, t3);
2861}
2862
2863static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2864{
2865    TCGv_i32 t1;
2866
2867    t1 = tcg_temp_new_i32();
2868    gen_vmulwod_w_h(t1, a, b);
2869    tcg_gen_add_i32(t, t, t1);
2870}
2871
2872static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2873{
2874    TCGv_i64 t1;
2875
2876    t1 = tcg_temp_new_i64();
2877    gen_vmulwod_d_w(t1, a, b);
2878    tcg_gen_add_i64(t, t, t1);
2879}
2880
2881static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2882                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2883{
2884    static const TCGOpcode vecop_list[] = {
2885        INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
2886        };
2887    static const GVecGen3 op[3] = {
2888        {
2889            .fniv = gen_vmaddwod_s,
2890            .fno = gen_helper_vmaddwod_h_b,
2891            .load_dest = true,
2892            .opt_opc = vecop_list,
2893            .vece = MO_16
2894        },
2895        {
2896            .fni4 = gen_vmaddwod_w_h,
2897            .fniv = gen_vmaddwod_s,
2898            .fno = gen_helper_vmaddwod_w_h,
2899            .load_dest = true,
2900            .opt_opc = vecop_list,
2901            .vece = MO_32
2902        },
2903        {
2904            .fni8 = gen_vmaddwod_d_w,
2905            .fniv = gen_vmaddwod_s,
2906            .fno = gen_helper_vmaddwod_d_w,
2907            .load_dest = true,
2908            .opt_opc = vecop_list,
2909            .vece = MO_64
2910        },
2911    };
2912
2913    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2914}
2915
2916TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s)
2917TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s)
2918TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s)
2919TRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s)
2920TRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s)
2921TRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s)
2922
2923static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2924{
2925    TCGv_vec t1, t2, mask;
2926
2927    t1 = tcg_temp_new_vec_matching(t);
2928    t2 = tcg_temp_new_vec_matching(b);
2929    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2930    tcg_gen_and_vec(vece, t1, a, mask);
2931    tcg_gen_and_vec(vece, t2, b, mask);
2932    tcg_gen_mul_vec(vece, t1, t1, t2);
2933    tcg_gen_add_vec(vece, t, t, t1);
2934}
2935
2936static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2937{
2938    TCGv_i32 t1;
2939
2940    t1 = tcg_temp_new_i32();
2941    gen_vmulwev_w_hu(t1, a, b);
2942    tcg_gen_add_i32(t, t, t1);
2943}
2944
2945static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2946{
2947    TCGv_i64 t1;
2948
2949    t1 = tcg_temp_new_i64();
2950    gen_vmulwev_d_wu(t1, a, b);
2951    tcg_gen_add_i64(t, t, t1);
2952}
2953
2954static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2955                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2956{
2957    static const TCGOpcode vecop_list[] = {
2958        INDEX_op_mul_vec, INDEX_op_add_vec, 0
2959        };
2960    static const GVecGen3 op[3] = {
2961        {
2962            .fniv = gen_vmaddwev_u,
2963            .fno = gen_helper_vmaddwev_h_bu,
2964            .load_dest = true,
2965            .opt_opc = vecop_list,
2966            .vece = MO_16
2967        },
2968        {
2969            .fni4 = gen_vmaddwev_w_hu,
2970            .fniv = gen_vmaddwev_u,
2971            .fno = gen_helper_vmaddwev_w_hu,
2972            .load_dest = true,
2973            .opt_opc = vecop_list,
2974            .vece = MO_32
2975        },
2976        {
2977            .fni8 = gen_vmaddwev_d_wu,
2978            .fniv = gen_vmaddwev_u,
2979            .fno = gen_helper_vmaddwev_d_wu,
2980            .load_dest = true,
2981            .opt_opc = vecop_list,
2982            .vece = MO_64
2983        },
2984    };
2985
2986    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2987}
2988
2989TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u)
2990TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u)
2991TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u)
2992TRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u)
2993TRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u)
2994TRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u)
2995
2996static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2997{
2998    TCGv_vec t1, t2, t3;
2999    int halfbits = 4 << vece;
3000
3001    t1 = tcg_temp_new_vec_matching(a);
3002    t2 = tcg_temp_new_vec_matching(b);
3003    t3 = tcg_temp_new_vec_matching(t);
3004    tcg_gen_shri_vec(vece, t1, a, halfbits);
3005    tcg_gen_shri_vec(vece, t2, b, halfbits);
3006    tcg_gen_mul_vec(vece, t3, t1, t2);
3007    tcg_gen_add_vec(vece, t, t, t3);
3008}
3009
3010static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
3011{
3012    TCGv_i32 t1;
3013
3014    t1 = tcg_temp_new_i32();
3015    gen_vmulwod_w_hu(t1, a, b);
3016    tcg_gen_add_i32(t, t, t1);
3017}
3018
3019static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
3020{
3021    TCGv_i64 t1;
3022
3023    t1 = tcg_temp_new_i64();
3024    gen_vmulwod_d_wu(t1, a, b);
3025    tcg_gen_add_i64(t, t, t1);
3026}
3027
3028static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3029                          uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3030{
3031    static const TCGOpcode vecop_list[] = {
3032        INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
3033        };
3034    static const GVecGen3 op[3] = {
3035        {
3036            .fniv = gen_vmaddwod_u,
3037            .fno = gen_helper_vmaddwod_h_bu,
3038            .load_dest = true,
3039            .opt_opc = vecop_list,
3040            .vece = MO_16
3041        },
3042        {
3043            .fni4 = gen_vmaddwod_w_hu,
3044            .fniv = gen_vmaddwod_u,
3045            .fno = gen_helper_vmaddwod_w_hu,
3046            .load_dest = true,
3047            .opt_opc = vecop_list,
3048            .vece = MO_32
3049        },
3050        {
3051            .fni8 = gen_vmaddwod_d_wu,
3052            .fniv = gen_vmaddwod_u,
3053            .fno = gen_helper_vmaddwod_d_wu,
3054            .load_dest = true,
3055            .opt_opc = vecop_list,
3056            .vece = MO_64
3057        },
3058    };
3059
3060    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3061}
3062
3063TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u)
3064TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u)
3065TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u)
3066TRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u)
3067TRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u)
3068TRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u)
3069
3070static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3071{
3072    TCGv_vec t1, t2, mask;
3073    int halfbits = 4 << vece;
3074
3075    t1 = tcg_temp_new_vec_matching(a);
3076    t2 = tcg_temp_new_vec_matching(b);
3077    mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
3078    tcg_gen_and_vec(vece, t1, a, mask);
3079    tcg_gen_shli_vec(vece, t2, b, halfbits);
3080    tcg_gen_sari_vec(vece, t2, t2, halfbits);
3081    tcg_gen_mul_vec(vece, t1, t1, t2);
3082    tcg_gen_add_vec(vece, t, t, t1);
3083}
3084
3085static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
3086{
3087    TCGv_i32 t1;
3088
3089    t1 = tcg_temp_new_i32();
3090    gen_vmulwev_w_hu_h(t1, a, b);
3091    tcg_gen_add_i32(t, t, t1);
3092}
3093
3094static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
3095{
3096    TCGv_i64 t1;
3097
3098    t1 = tcg_temp_new_i64();
3099    gen_vmulwev_d_wu_w(t1, a, b);
3100    tcg_gen_add_i64(t, t, t1);
3101}
3102
3103static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3104                            uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3105{
3106    static const TCGOpcode vecop_list[] = {
3107        INDEX_op_shli_vec, INDEX_op_sari_vec,
3108        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3109        };
3110    static const GVecGen3 op[3] = {
3111        {
3112            .fniv = gen_vmaddwev_u_s,
3113            .fno = gen_helper_vmaddwev_h_bu_b,
3114            .load_dest = true,
3115            .opt_opc = vecop_list,
3116            .vece = MO_16
3117        },
3118        {
3119            .fni4 = gen_vmaddwev_w_hu_h,
3120            .fniv = gen_vmaddwev_u_s,
3121            .fno = gen_helper_vmaddwev_w_hu_h,
3122            .load_dest = true,
3123            .opt_opc = vecop_list,
3124            .vece = MO_32
3125        },
3126        {
3127            .fni8 = gen_vmaddwev_d_wu_w,
3128            .fniv = gen_vmaddwev_u_s,
3129            .fno = gen_helper_vmaddwev_d_wu_w,
3130            .load_dest = true,
3131            .opt_opc = vecop_list,
3132            .vece = MO_64
3133        },
3134    };
3135
3136    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3137}
3138
3139TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s)
3140TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s)
3141TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s)
3142TRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s)
3143TRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s)
3144TRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s)
3145
3146static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3147{
3148    TCGv_vec t1, t2, t3;
3149    int halfbits = 4 << vece;
3150
3151    t1 = tcg_temp_new_vec_matching(a);
3152    t2 = tcg_temp_new_vec_matching(b);
3153    t3 = tcg_temp_new_vec_matching(t);
3154    tcg_gen_shri_vec(vece, t1, a, halfbits);
3155    tcg_gen_sari_vec(vece, t2, b, halfbits);
3156    tcg_gen_mul_vec(vece, t3, t1, t2);
3157    tcg_gen_add_vec(vece, t, t, t3);
3158}
3159
3160static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
3161{
3162    TCGv_i32 t1;
3163
3164    t1 = tcg_temp_new_i32();
3165    gen_vmulwod_w_hu_h(t1, a, b);
3166    tcg_gen_add_i32(t, t, t1);
3167}
3168
3169static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
3170{
3171    TCGv_i64 t1;
3172
3173    t1 = tcg_temp_new_i64();
3174    gen_vmulwod_d_wu_w(t1, a, b);
3175    tcg_gen_add_i64(t, t, t1);
3176}
3177
3178static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3179                            uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3180{
3181    static const TCGOpcode vecop_list[] = {
3182        INDEX_op_shri_vec, INDEX_op_sari_vec,
3183        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3184        };
3185    static const GVecGen3 op[3] = {
3186        {
3187            .fniv = gen_vmaddwod_u_s,
3188            .fno = gen_helper_vmaddwod_h_bu_b,
3189            .load_dest = true,
3190            .opt_opc = vecop_list,
3191            .vece = MO_16
3192        },
3193        {
3194            .fni4 = gen_vmaddwod_w_hu_h,
3195            .fniv = gen_vmaddwod_u_s,
3196            .fno = gen_helper_vmaddwod_w_hu_h,
3197            .load_dest = true,
3198            .opt_opc = vecop_list,
3199            .vece = MO_32
3200        },
3201        {
3202            .fni8 = gen_vmaddwod_d_wu_w,
3203            .fniv = gen_vmaddwod_u_s,
3204            .fno = gen_helper_vmaddwod_d_wu_w,
3205            .load_dest = true,
3206            .opt_opc = vecop_list,
3207            .vece = MO_64
3208        },
3209    };
3210
3211    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3212}
3213
3214TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s)
3215TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s)
3216TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s)
3217TRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s)
3218TRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s)
3219TRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s)
3220
3221TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b)
3222TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h)
3223TRANS(vdiv_w, LSX, gen_vvv, gen_helper_vdiv_w)
3224TRANS(vdiv_d, LSX, gen_vvv, gen_helper_vdiv_d)
3225TRANS(vdiv_bu, LSX, gen_vvv, gen_helper_vdiv_bu)
3226TRANS(vdiv_hu, LSX, gen_vvv, gen_helper_vdiv_hu)
3227TRANS(vdiv_wu, LSX, gen_vvv, gen_helper_vdiv_wu)
3228TRANS(vdiv_du, LSX, gen_vvv, gen_helper_vdiv_du)
3229TRANS(vmod_b, LSX, gen_vvv, gen_helper_vmod_b)
3230TRANS(vmod_h, LSX, gen_vvv, gen_helper_vmod_h)
3231TRANS(vmod_w, LSX, gen_vvv, gen_helper_vmod_w)
3232TRANS(vmod_d, LSX, gen_vvv, gen_helper_vmod_d)
3233TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu)
3234TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu)
3235TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu)
3236TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du)
3237TRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b)
3238TRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h)
3239TRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w)
3240TRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d)
3241TRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu)
3242TRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu)
3243TRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu)
3244TRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du)
3245TRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b)
3246TRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h)
3247TRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w)
3248TRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d)
3249TRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu)
3250TRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu)
3251TRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu)
3252TRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du)
3253
3254static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
3255{
3256    TCGv_vec min;
3257
3258    min = tcg_temp_new_vec_matching(t);
3259    tcg_gen_not_vec(vece, min, max);
3260    tcg_gen_smax_vec(vece, t, a, min);
3261    tcg_gen_smin_vec(vece, t, t, max);
3262}
3263
3264static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3265                      int64_t imm, uint32_t oprsz, uint32_t maxsz)
3266{
3267    static const TCGOpcode vecop_list[] = {
3268        INDEX_op_smax_vec, INDEX_op_smin_vec, 0
3269        };
3270    static const GVecGen2s op[4] = {
3271        {
3272            .fniv = gen_vsat_s,
3273            .fno = gen_helper_vsat_b,
3274            .opt_opc = vecop_list,
3275            .vece = MO_8
3276        },
3277        {
3278            .fniv = gen_vsat_s,
3279            .fno = gen_helper_vsat_h,
3280            .opt_opc = vecop_list,
3281            .vece = MO_16
3282        },
3283        {
3284            .fniv = gen_vsat_s,
3285            .fno = gen_helper_vsat_w,
3286            .opt_opc = vecop_list,
3287            .vece = MO_32
3288        },
3289        {
3290            .fniv = gen_vsat_s,
3291            .fno = gen_helper_vsat_d,
3292            .opt_opc = vecop_list,
3293            .vece = MO_64
3294        },
3295    };
3296
3297    tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
3298                    tcg_constant_i64((1ll<< imm) -1), &op[vece]);
3299}
3300
3301TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s)
3302TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s)
3303TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s)
3304TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s)
3305TRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s)
3306TRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s)
3307TRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s)
3308TRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s)
3309
3310static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
3311{
3312    tcg_gen_umin_vec(vece, t, a, max);
3313}
3314
3315static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3316                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
3317{
3318    uint64_t max;
3319    static const TCGOpcode vecop_list[] = {
3320        INDEX_op_umin_vec, 0
3321        };
3322    static const GVecGen2s op[4] = {
3323        {
3324            .fniv = gen_vsat_u,
3325            .fno = gen_helper_vsat_bu,
3326            .opt_opc = vecop_list,
3327            .vece = MO_8
3328        },
3329        {
3330            .fniv = gen_vsat_u,
3331            .fno = gen_helper_vsat_hu,
3332            .opt_opc = vecop_list,
3333            .vece = MO_16
3334        },
3335        {
3336            .fniv = gen_vsat_u,
3337            .fno = gen_helper_vsat_wu,
3338            .opt_opc = vecop_list,
3339            .vece = MO_32
3340        },
3341        {
3342            .fniv = gen_vsat_u,
3343            .fno = gen_helper_vsat_du,
3344            .opt_opc = vecop_list,
3345            .vece = MO_64
3346        },
3347    };
3348
3349    max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1;
3350    tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
3351                    tcg_constant_i64(max), &op[vece]);
3352}
3353
3354TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u)
3355TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u)
3356TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u)
3357TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u)
3358TRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u)
3359TRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u)
3360TRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u)
3361TRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u)
3362
3363TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b)
3364TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h)
3365TRANS(vexth_d_w, LSX, gen_vv, gen_helper_vexth_d_w)
3366TRANS(vexth_q_d, LSX, gen_vv, gen_helper_vexth_q_d)
3367TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu)
3368TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu)
3369TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu)
3370TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du)
3371TRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b)
3372TRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h)
3373TRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w)
3374TRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d)
3375TRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu)
3376TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu)
3377TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu)
3378TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du)
3379
3380TRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b)
3381TRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b)
3382TRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b)
3383TRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h)
3384TRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h)
3385TRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w)
3386TRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu)
3387TRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu)
3388TRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu)
3389TRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu)
3390TRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu)
3391TRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu)
3392
3393static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3394{
3395    TCGv_vec t1, zero;
3396
3397    t1 = tcg_temp_new_vec_matching(t);
3398    zero = tcg_constant_vec_matching(t, vece, 0);
3399
3400    tcg_gen_neg_vec(vece, t1, b);
3401    tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b);
3402    tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t);
3403}
3404
3405static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3406                        uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3407{
3408    static const TCGOpcode vecop_list[] = {
3409        INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0
3410        };
3411    static const GVecGen3 op[4] = {
3412        {
3413            .fniv = gen_vsigncov,
3414            .fno = gen_helper_vsigncov_b,
3415            .opt_opc = vecop_list,
3416            .vece = MO_8
3417        },
3418        {
3419            .fniv = gen_vsigncov,
3420            .fno = gen_helper_vsigncov_h,
3421            .opt_opc = vecop_list,
3422            .vece = MO_16
3423        },
3424        {
3425            .fniv = gen_vsigncov,
3426            .fno = gen_helper_vsigncov_w,
3427            .opt_opc = vecop_list,
3428            .vece = MO_32
3429        },
3430        {
3431            .fniv = gen_vsigncov,
3432            .fno = gen_helper_vsigncov_d,
3433            .opt_opc = vecop_list,
3434            .vece = MO_64
3435        },
3436    };
3437
3438    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3439}
3440
3441TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov)
3442TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov)
3443TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov)
3444TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov)
3445TRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov)
3446TRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov)
3447TRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov)
3448TRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov)
3449
3450TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b)
3451TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h)
3452TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w)
3453TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d)
3454TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b)
3455TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b)
3456TRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b)
3457TRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h)
3458TRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w)
3459TRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d)
3460TRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b)
3461TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b)
3462
3463#define EXPAND_BYTE(bit)  ((uint64_t)(bit ? 0xff : 0))
3464
3465static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
3466{
3467    int mode;
3468    uint64_t data = 0, t;
3469
3470    /*
3471     * imm bit [11:8] is mode, mode value is 0-12.
3472     * other values are invalid.
3473     */
3474    mode = (imm >> 8) & 0xf;
3475    t =  imm & 0xff;
3476    switch (mode) {
3477    case 0:
3478        /* data: {2{24'0, imm[7:0]}} */
3479        data =  (t << 32) | t ;
3480        break;
3481    case 1:
3482        /* data: {2{16'0, imm[7:0], 8'0}} */
3483        data = (t << 40) | (t << 8);
3484        break;
3485    case 2:
3486        /* data: {2{8'0, imm[7:0], 16'0}} */
3487        data = (t << 48) | (t << 16);
3488        break;
3489    case 3:
3490        /* data: {2{imm[7:0], 24'0}} */
3491        data = (t << 56) | (t << 24);
3492        break;
3493    case 4:
3494        /* data: {4{8'0, imm[7:0]}} */
3495        data = (t << 48) | (t << 32) | (t << 16) | t;
3496        break;
3497    case 5:
3498        /* data: {4{imm[7:0], 8'0}} */
3499        data = (t << 56) |(t << 40) | (t << 24) | (t << 8);
3500        break;
3501    case 6:
3502        /* data: {2{16'0, imm[7:0], 8'1}} */
3503        data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff;
3504        break;
3505    case 7:
3506        /* data: {2{8'0, imm[7:0], 16'1}} */
3507        data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff;
3508        break;
3509    case 8:
3510        /* data: {8{imm[7:0]}} */
3511        data =(t << 56) | (t << 48) | (t << 40) | (t << 32) |
3512              (t << 24) | (t << 16) | (t << 8) | t;
3513        break;
3514    case 9:
3515        /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */
3516        {
3517            uint64_t b0,b1,b2,b3,b4,b5,b6,b7;
3518            b0 = t& 0x1;
3519            b1 = (t & 0x2) >> 1;
3520            b2 = (t & 0x4) >> 2;
3521            b3 = (t & 0x8) >> 3;
3522            b4 = (t & 0x10) >> 4;
3523            b5 = (t & 0x20) >> 5;
3524            b6 = (t & 0x40) >> 6;
3525            b7 = (t & 0x80) >> 7;
3526            data = (EXPAND_BYTE(b7) << 56) |
3527                   (EXPAND_BYTE(b6) << 48) |
3528                   (EXPAND_BYTE(b5) << 40) |
3529                   (EXPAND_BYTE(b4) << 32) |
3530                   (EXPAND_BYTE(b3) << 24) |
3531                   (EXPAND_BYTE(b2) << 16) |
3532                   (EXPAND_BYTE(b1) <<  8) |
3533                   EXPAND_BYTE(b0);
3534        }
3535        break;
3536    case 10:
3537        /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */
3538        {
3539            uint64_t b6, b7;
3540            uint64_t t0, t1;
3541            b6 = (imm & 0x40) >> 6;
3542            b7 = (imm & 0x80) >> 7;
3543            t0 = (imm & 0x3f);
3544            t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0);
3545            data  = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19);
3546        }
3547        break;
3548    case 11:
3549        /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */
3550        {
3551            uint64_t b6,b7;
3552            uint64_t t0, t1;
3553            b6 = (imm & 0x40) >> 6;
3554            b7 = (imm & 0x80) >> 7;
3555            t0 = (imm & 0x3f);
3556            t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0);
3557            data = (t1 << 25) | (t0 << 19);
3558        }
3559        break;
3560    case 12:
3561        /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */
3562        {
3563            uint64_t b6,b7;
3564            uint64_t t0, t1;
3565            b6 = (imm & 0x40) >> 6;
3566            b7 = (imm & 0x80) >> 7;
3567            t0 = (imm & 0x3f);
3568            t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0);
3569            data = (t1 << 54) | (t0 << 48);
3570        }
3571        break;
3572    default:
3573        g_assert_not_reached();
3574    }
3575    return data;
3576}
3577
3578static bool check_valid_vldi_mode(arg_vldi *a)
3579{
3580   return  extract32(a->imm, 8, 4) <= 12;
3581}
3582
3583static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz)
3584{
3585    int sel, vece;
3586    uint64_t value;
3587
3588    sel = (a->imm >> 12) & 0x1;
3589
3590    if (sel && !check_valid_vldi_mode(a)) {
3591        generate_exception(ctx, EXCCODE_INE);
3592        return true;
3593    }
3594
3595    if (!check_vec(ctx, oprsz)) {
3596        return true;
3597    }
3598
3599    if (sel) {
3600        value = vldi_get_value(ctx, a->imm);
3601        vece = MO_64;
3602    } else {
3603        value = ((int32_t)(a->imm << 22)) >> 22;
3604        vece = (a->imm >> 10) & 0x3;
3605    }
3606
3607    tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8,
3608                         tcg_constant_i64(value));
3609    return true;
3610}
3611
3612TRANS(vldi, LSX, gen_vldi, 16)
3613TRANS(xvldi, LASX, gen_vldi, 32)
3614
3615static bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz)
3616{
3617    uint32_t vd_ofs, vj_ofs, vk_ofs;
3618
3619    if (!check_vec(ctx, oprsz)) {
3620        return true;
3621    }
3622
3623    vd_ofs = vec_full_offset(a->vd);
3624    vj_ofs = vec_full_offset(a->vj);
3625    vk_ofs = vec_full_offset(a->vk);
3626
3627    tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8);
3628    return true;
3629}
3630
3631static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3632{
3633    TCGv_vec t1;
3634
3635    t1 = tcg_constant_vec_matching(t, vece, imm);
3636    tcg_gen_nor_vec(vece, t, a, t1);
3637}
3638
3639static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm)
3640{
3641    tcg_gen_movi_i64(t, dup_const(MO_8, imm));
3642    tcg_gen_nor_i64(t, a, t);
3643}
3644
3645static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3646                       int64_t imm, uint32_t oprsz, uint32_t maxsz)
3647{
3648    static const TCGOpcode vecop_list[] = {
3649        INDEX_op_nor_vec, 0
3650        };
3651    static const GVecGen2i op = {
3652       .fni8 = gen_vnori_b,
3653       .fniv = gen_vnori,
3654       .fnoi = gen_helper_vnori_b,
3655       .opt_opc = vecop_list,
3656       .vece = MO_8
3657    };
3658
3659    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
3660}
3661
3662TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
3663TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
3664TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
3665TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
3666TRANS(vandn_v, LSX, gen_vandn_v, 16)
3667TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
3668TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
3669TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
3670TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
3671TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b)
3672TRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and)
3673TRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or)
3674TRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor)
3675TRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor)
3676TRANS(xvandn_v, LASX, gen_vandn_v, 32)
3677TRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc)
3678TRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi)
3679TRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori)
3680TRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori)
3681TRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b)
3682
3683TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
3684TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
3685TRANS(vsll_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shlv)
3686TRANS(vsll_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shlv)
3687TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
3688TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
3689TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
3690TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
3691TRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv)
3692TRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv)
3693TRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv)
3694TRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv)
3695TRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli)
3696TRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli)
3697TRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli)
3698TRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli)
3699
3700TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
3701TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
3702TRANS(vsrl_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shrv)
3703TRANS(vsrl_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shrv)
3704TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
3705TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
3706TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
3707TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
3708TRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv)
3709TRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv)
3710TRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv)
3711TRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv)
3712TRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri)
3713TRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri)
3714TRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri)
3715TRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri)
3716
3717TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
3718TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
3719TRANS(vsra_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sarv)
3720TRANS(vsra_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sarv)
3721TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
3722TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
3723TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
3724TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
3725TRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv)
3726TRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv)
3727TRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv)
3728TRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv)
3729TRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari)
3730TRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari)
3731TRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari)
3732TRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari)
3733
3734TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
3735TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
3736TRANS(vrotr_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_rotrv)
3737TRANS(vrotr_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_rotrv)
3738TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
3739TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
3740TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
3741TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
3742TRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv)
3743TRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv)
3744TRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv)
3745TRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv)
3746TRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri)
3747TRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri)
3748TRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri)
3749TRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri)
3750
3751TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b)
3752TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h)
3753TRANS(vsllwil_d_w, LSX, gen_vv_i, gen_helper_vsllwil_d_w)
3754TRANS(vextl_q_d, LSX, gen_vv, gen_helper_vextl_q_d)
3755TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu)
3756TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu)
3757TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu)
3758TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du)
3759TRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b)
3760TRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h)
3761TRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w)
3762TRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d)
3763TRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu)
3764TRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu)
3765TRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu)
3766TRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du)
3767
3768TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b)
3769TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h)
3770TRANS(vsrlr_w, LSX, gen_vvv, gen_helper_vsrlr_w)
3771TRANS(vsrlr_d, LSX, gen_vvv, gen_helper_vsrlr_d)
3772TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b)
3773TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h)
3774TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w)
3775TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d)
3776TRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b)
3777TRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h)
3778TRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w)
3779TRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d)
3780TRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b)
3781TRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h)
3782TRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w)
3783TRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d)
3784
3785TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b)
3786TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h)
3787TRANS(vsrar_w, LSX, gen_vvv, gen_helper_vsrar_w)
3788TRANS(vsrar_d, LSX, gen_vvv, gen_helper_vsrar_d)
3789TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b)
3790TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h)
3791TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w)
3792TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d)
3793TRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b)
3794TRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h)
3795TRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w)
3796TRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d)
3797TRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b)
3798TRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h)
3799TRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w)
3800TRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d)
3801
3802TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h)
3803TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w)
3804TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d)
3805TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h)
3806TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w)
3807TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d)
3808TRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h)
3809TRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w)
3810TRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d)
3811TRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h)
3812TRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w)
3813TRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d)
3814
3815TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h)
3816TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w)
3817TRANS(vsrlni_w_d, LSX, gen_vv_i, gen_helper_vsrlni_w_d)
3818TRANS(vsrlni_d_q, LSX, gen_vv_i, gen_helper_vsrlni_d_q)
3819TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h)
3820TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w)
3821TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d)
3822TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q)
3823TRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h)
3824TRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w)
3825TRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d)
3826TRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q)
3827TRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h)
3828TRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w)
3829TRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d)
3830TRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q)
3831
3832TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h)
3833TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w)
3834TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d)
3835TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h)
3836TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w)
3837TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d)
3838TRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h)
3839TRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w)
3840TRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d)
3841TRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h)
3842TRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w)
3843TRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d)
3844
3845TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h)
3846TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w)
3847TRANS(vsrlrni_w_d, LSX, gen_vv_i, gen_helper_vsrlrni_w_d)
3848TRANS(vsrlrni_d_q, LSX, gen_vv_i, gen_helper_vsrlrni_d_q)
3849TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h)
3850TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w)
3851TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d)
3852TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q)
3853TRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h)
3854TRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w)
3855TRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d)
3856TRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q)
3857TRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h)
3858TRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w)
3859TRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d)
3860TRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q)
3861
3862TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h)
3863TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w)
3864TRANS(vssrln_w_d, LSX, gen_vvv, gen_helper_vssrln_w_d)
3865TRANS(vssran_b_h, LSX, gen_vvv, gen_helper_vssran_b_h)
3866TRANS(vssran_h_w, LSX, gen_vvv, gen_helper_vssran_h_w)
3867TRANS(vssran_w_d, LSX, gen_vvv, gen_helper_vssran_w_d)
3868TRANS(vssrln_bu_h, LSX, gen_vvv, gen_helper_vssrln_bu_h)
3869TRANS(vssrln_hu_w, LSX, gen_vvv, gen_helper_vssrln_hu_w)
3870TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d)
3871TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h)
3872TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w)
3873TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d)
3874TRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h)
3875TRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w)
3876TRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d)
3877TRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h)
3878TRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w)
3879TRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d)
3880TRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h)
3881TRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w)
3882TRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d)
3883TRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h)
3884TRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w)
3885TRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d)
3886
3887TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h)
3888TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w)
3889TRANS(vssrlni_w_d, LSX, gen_vv_i, gen_helper_vssrlni_w_d)
3890TRANS(vssrlni_d_q, LSX, gen_vv_i, gen_helper_vssrlni_d_q)
3891TRANS(vssrani_b_h, LSX, gen_vv_i, gen_helper_vssrani_b_h)
3892TRANS(vssrani_h_w, LSX, gen_vv_i, gen_helper_vssrani_h_w)
3893TRANS(vssrani_w_d, LSX, gen_vv_i, gen_helper_vssrani_w_d)
3894TRANS(vssrani_d_q, LSX, gen_vv_i, gen_helper_vssrani_d_q)
3895TRANS(vssrlni_bu_h, LSX, gen_vv_i, gen_helper_vssrlni_bu_h)
3896TRANS(vssrlni_hu_w, LSX, gen_vv_i, gen_helper_vssrlni_hu_w)
3897TRANS(vssrlni_wu_d, LSX, gen_vv_i, gen_helper_vssrlni_wu_d)
3898TRANS(vssrlni_du_q, LSX, gen_vv_i, gen_helper_vssrlni_du_q)
3899TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h)
3900TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w)
3901TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d)
3902TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q)
3903TRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h)
3904TRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w)
3905TRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d)
3906TRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q)
3907TRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h)
3908TRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w)
3909TRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d)
3910TRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q)
3911TRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h)
3912TRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w)
3913TRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d)
3914TRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q)
3915TRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h)
3916TRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w)
3917TRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d)
3918TRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q)
3919
3920TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h)
3921TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w)
3922TRANS(vssrlrn_w_d, LSX, gen_vvv, gen_helper_vssrlrn_w_d)
3923TRANS(vssrarn_b_h, LSX, gen_vvv, gen_helper_vssrarn_b_h)
3924TRANS(vssrarn_h_w, LSX, gen_vvv, gen_helper_vssrarn_h_w)
3925TRANS(vssrarn_w_d, LSX, gen_vvv, gen_helper_vssrarn_w_d)
3926TRANS(vssrlrn_bu_h, LSX, gen_vvv, gen_helper_vssrlrn_bu_h)
3927TRANS(vssrlrn_hu_w, LSX, gen_vvv, gen_helper_vssrlrn_hu_w)
3928TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d)
3929TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h)
3930TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w)
3931TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d)
3932TRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h)
3933TRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w)
3934TRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d)
3935TRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h)
3936TRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w)
3937TRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d)
3938TRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h)
3939TRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w)
3940TRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d)
3941TRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h)
3942TRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w)
3943TRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d)
3944
3945TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h)
3946TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w)
3947TRANS(vssrlrni_w_d, LSX, gen_vv_i, gen_helper_vssrlrni_w_d)
3948TRANS(vssrlrni_d_q, LSX, gen_vv_i, gen_helper_vssrlrni_d_q)
3949TRANS(vssrarni_b_h, LSX, gen_vv_i, gen_helper_vssrarni_b_h)
3950TRANS(vssrarni_h_w, LSX, gen_vv_i, gen_helper_vssrarni_h_w)
3951TRANS(vssrarni_w_d, LSX, gen_vv_i, gen_helper_vssrarni_w_d)
3952TRANS(vssrarni_d_q, LSX, gen_vv_i, gen_helper_vssrarni_d_q)
3953TRANS(vssrlrni_bu_h, LSX, gen_vv_i, gen_helper_vssrlrni_bu_h)
3954TRANS(vssrlrni_hu_w, LSX, gen_vv_i, gen_helper_vssrlrni_hu_w)
3955TRANS(vssrlrni_wu_d, LSX, gen_vv_i, gen_helper_vssrlrni_wu_d)
3956TRANS(vssrlrni_du_q, LSX, gen_vv_i, gen_helper_vssrlrni_du_q)
3957TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h)
3958TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w)
3959TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d)
3960TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q)
3961TRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h)
3962TRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w)
3963TRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d)
3964TRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q)
3965TRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h)
3966TRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w)
3967TRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d)
3968TRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q)
3969TRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h)
3970TRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w)
3971TRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d)
3972TRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q)
3973TRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h)
3974TRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w)
3975TRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d)
3976TRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q)
3977
3978TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b)
3979TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h)
3980TRANS(vclo_w, LSX, gen_vv, gen_helper_vclo_w)
3981TRANS(vclo_d, LSX, gen_vv, gen_helper_vclo_d)
3982TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b)
3983TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h)
3984TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w)
3985TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d)
3986TRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b)
3987TRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h)
3988TRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w)
3989TRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d)
3990TRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b)
3991TRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h)
3992TRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w)
3993TRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d)
3994
3995TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
3996TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
3997TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w)
3998TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d)
3999TRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b)
4000TRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h)
4001TRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w)
4002TRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d)
4003
4004static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
4005                    void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
4006{
4007    TCGv_vec mask, lsh, t1, one;
4008
4009    lsh = tcg_temp_new_vec_matching(t);
4010    t1 = tcg_temp_new_vec_matching(t);
4011    mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1);
4012    one = tcg_constant_vec_matching(t, vece, 1);
4013
4014    tcg_gen_and_vec(vece, lsh, b, mask);
4015    tcg_gen_shlv_vec(vece, t1, one, lsh);
4016    func(vece, t, a, t1);
4017}
4018
4019static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
4020{
4021    do_vbit(vece, t, a, b, tcg_gen_andc_vec);
4022}
4023
4024static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
4025{
4026    do_vbit(vece, t, a, b, tcg_gen_or_vec);
4027}
4028
4029static void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
4030{
4031    do_vbit(vece, t, a, b, tcg_gen_xor_vec);
4032}
4033
4034static void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4035                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
4036{
4037    static const TCGOpcode vecop_list[] = {
4038        INDEX_op_shlv_vec, INDEX_op_andc_vec, 0
4039        };
4040    static const GVecGen3 op[4] = {
4041        {
4042            .fniv = gen_vbitclr,
4043            .fno = gen_helper_vbitclr_b,
4044            .opt_opc = vecop_list,
4045            .vece = MO_8
4046        },
4047        {
4048            .fniv = gen_vbitclr,
4049            .fno = gen_helper_vbitclr_h,
4050            .opt_opc = vecop_list,
4051            .vece = MO_16
4052        },
4053        {
4054            .fniv = gen_vbitclr,
4055            .fno = gen_helper_vbitclr_w,
4056            .opt_opc = vecop_list,
4057            .vece = MO_32
4058        },
4059        {
4060            .fniv = gen_vbitclr,
4061            .fno = gen_helper_vbitclr_d,
4062            .opt_opc = vecop_list,
4063            .vece = MO_64
4064        },
4065    };
4066
4067    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
4068}
4069
4070TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr)
4071TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr)
4072TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr)
4073TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr)
4074TRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr)
4075TRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr)
4076TRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr)
4077TRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr)
4078
4079static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
4080                     void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
4081{
4082    int lsh;
4083    TCGv_vec t1, one;
4084
4085    lsh = imm & ((8 << vece) -1);
4086    t1 = tcg_temp_new_vec_matching(t);
4087    one = tcg_constant_vec_matching(t, vece, 1);
4088
4089    tcg_gen_shli_vec(vece, t1, one, lsh);
4090    func(vece, t, a, t1);
4091}
4092
4093static void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4094{
4095    do_vbiti(vece, t, a, imm, tcg_gen_andc_vec);
4096}
4097
4098static void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4099{
4100    do_vbiti(vece, t, a, imm, tcg_gen_or_vec);
4101}
4102
4103static void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4104{
4105    do_vbiti(vece, t, a, imm, tcg_gen_xor_vec);
4106}
4107
4108static void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4109                        int64_t imm, uint32_t oprsz, uint32_t maxsz)
4110{
4111    static const TCGOpcode vecop_list[] = {
4112        INDEX_op_shli_vec, INDEX_op_andc_vec, 0
4113        };
4114    static const GVecGen2i op[4] = {
4115        {
4116            .fniv = gen_vbitclri,
4117            .fnoi = gen_helper_vbitclri_b,
4118            .opt_opc = vecop_list,
4119            .vece = MO_8
4120        },
4121        {
4122            .fniv = gen_vbitclri,
4123            .fnoi = gen_helper_vbitclri_h,
4124            .opt_opc = vecop_list,
4125            .vece = MO_16
4126        },
4127        {
4128            .fniv = gen_vbitclri,
4129            .fnoi = gen_helper_vbitclri_w,
4130            .opt_opc = vecop_list,
4131            .vece = MO_32
4132        },
4133        {
4134            .fniv = gen_vbitclri,
4135            .fnoi = gen_helper_vbitclri_d,
4136            .opt_opc = vecop_list,
4137            .vece = MO_64
4138        },
4139    };
4140
4141    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
4142}
4143
4144TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri)
4145TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri)
4146TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri)
4147TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri)
4148TRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri)
4149TRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri)
4150TRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri)
4151TRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri)
4152
4153static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4154                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
4155{
4156    static const TCGOpcode vecop_list[] = {
4157        INDEX_op_shlv_vec, 0
4158        };
4159    static const GVecGen3 op[4] = {
4160        {
4161            .fniv = gen_vbitset,
4162            .fno = gen_helper_vbitset_b,
4163            .opt_opc = vecop_list,
4164            .vece = MO_8
4165        },
4166        {
4167            .fniv = gen_vbitset,
4168            .fno = gen_helper_vbitset_h,
4169            .opt_opc = vecop_list,
4170            .vece = MO_16
4171        },
4172        {
4173            .fniv = gen_vbitset,
4174            .fno = gen_helper_vbitset_w,
4175            .opt_opc = vecop_list,
4176            .vece = MO_32
4177        },
4178        {
4179            .fniv = gen_vbitset,
4180            .fno = gen_helper_vbitset_d,
4181            .opt_opc = vecop_list,
4182            .vece = MO_64
4183        },
4184    };
4185
4186    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
4187}
4188
4189TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset)
4190TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset)
4191TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset)
4192TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset)
4193TRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset)
4194TRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset)
4195TRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset)
4196TRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset)
4197
4198static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4199                        int64_t imm, uint32_t oprsz, uint32_t maxsz)
4200{
4201    static const TCGOpcode vecop_list[] = {
4202        INDEX_op_shli_vec, 0
4203        };
4204    static const GVecGen2i op[4] = {
4205        {
4206            .fniv = gen_vbitseti,
4207            .fnoi = gen_helper_vbitseti_b,
4208            .opt_opc = vecop_list,
4209            .vece = MO_8
4210        },
4211        {
4212            .fniv = gen_vbitseti,
4213            .fnoi = gen_helper_vbitseti_h,
4214            .opt_opc = vecop_list,
4215            .vece = MO_16
4216        },
4217        {
4218            .fniv = gen_vbitseti,
4219            .fnoi = gen_helper_vbitseti_w,
4220            .opt_opc = vecop_list,
4221            .vece = MO_32
4222        },
4223        {
4224            .fniv = gen_vbitseti,
4225            .fnoi = gen_helper_vbitseti_d,
4226            .opt_opc = vecop_list,
4227            .vece = MO_64
4228        },
4229    };
4230
4231    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
4232}
4233
4234TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti)
4235TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti)
4236TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti)
4237TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti)
4238TRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti)
4239TRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti)
4240TRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti)
4241TRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti)
4242
4243static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4244                       uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
4245{
4246    static const TCGOpcode vecop_list[] = {
4247        INDEX_op_shlv_vec, 0
4248        };
4249    static const GVecGen3 op[4] = {
4250        {
4251            .fniv = gen_vbitrev,
4252            .fno = gen_helper_vbitrev_b,
4253            .opt_opc = vecop_list,
4254            .vece = MO_8
4255        },
4256        {
4257            .fniv = gen_vbitrev,
4258            .fno = gen_helper_vbitrev_h,
4259            .opt_opc = vecop_list,
4260            .vece = MO_16
4261        },
4262        {
4263            .fniv = gen_vbitrev,
4264            .fno = gen_helper_vbitrev_w,
4265            .opt_opc = vecop_list,
4266            .vece = MO_32
4267        },
4268        {
4269            .fniv = gen_vbitrev,
4270            .fno = gen_helper_vbitrev_d,
4271            .opt_opc = vecop_list,
4272            .vece = MO_64
4273        },
4274    };
4275
4276    tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
4277}
4278
4279TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev)
4280TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev)
4281TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev)
4282TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev)
4283TRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev)
4284TRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev)
4285TRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev)
4286TRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev)
4287
4288static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
4289                        int64_t imm, uint32_t oprsz, uint32_t maxsz)
4290{
4291    static const TCGOpcode vecop_list[] = {
4292        INDEX_op_shli_vec, 0
4293        };
4294    static const GVecGen2i op[4] = {
4295        {
4296            .fniv = gen_vbitrevi,
4297            .fnoi = gen_helper_vbitrevi_b,
4298            .opt_opc = vecop_list,
4299            .vece = MO_8
4300        },
4301        {
4302            .fniv = gen_vbitrevi,
4303            .fnoi = gen_helper_vbitrevi_h,
4304            .opt_opc = vecop_list,
4305            .vece = MO_16
4306        },
4307        {
4308            .fniv = gen_vbitrevi,
4309            .fnoi = gen_helper_vbitrevi_w,
4310            .opt_opc = vecop_list,
4311            .vece = MO_32
4312        },
4313        {
4314            .fniv = gen_vbitrevi,
4315            .fnoi = gen_helper_vbitrevi_d,
4316            .opt_opc = vecop_list,
4317            .vece = MO_64
4318        },
4319    };
4320
4321    tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
4322}
4323
4324TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi)
4325TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi)
4326TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi)
4327TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi)
4328TRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi)
4329TRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi)
4330TRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi)
4331TRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi)
4332
4333TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
4334TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
4335TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
4336TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
4337TRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b)
4338TRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h)
4339TRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b)
4340TRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h)
4341
4342TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
4343TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
4344TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s)
4345TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d)
4346TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
4347TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
4348TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
4349TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
4350TRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s)
4351TRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d)
4352TRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s)
4353TRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d)
4354TRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s)
4355TRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d)
4356TRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s)
4357TRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d)
4358
4359TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
4360TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
4361TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s)
4362TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d)
4363TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
4364TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
4365TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
4366TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
4367TRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s)
4368TRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d)
4369TRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s)
4370TRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d)
4371TRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s)
4372TRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d)
4373TRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s)
4374TRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d)
4375
4376TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
4377TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
4378TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
4379TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
4380TRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s)
4381TRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d)
4382TRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s)
4383TRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d)
4384
4385TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
4386TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
4387TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
4388TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
4389TRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s)
4390TRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d)
4391TRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s)
4392TRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d)
4393
4394TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
4395TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
4396TRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s)
4397TRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d)
4398
4399TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
4400TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
4401TRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s)
4402TRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d)
4403
4404TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
4405TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
4406TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
4407TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
4408TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
4409TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
4410TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s)
4411TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d)
4412TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s)
4413TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d)
4414TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
4415TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
4416
4417TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
4418TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
4419TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
4420TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
4421TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
4422TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
4423TRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h)
4424TRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h)
4425TRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s)
4426TRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s)
4427TRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s)
4428TRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d)
4429
4430TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
4431TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
4432TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s)
4433TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d)
4434TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s)
4435TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d)
4436TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
4437TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
4438TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
4439TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
4440TRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s)
4441TRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d)
4442TRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s)
4443TRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d)
4444TRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s)
4445TRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d)
4446TRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s)
4447TRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d)
4448TRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s)
4449TRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d)
4450
4451TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
4452TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
4453TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s)
4454TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d)
4455TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s)
4456TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d)
4457TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s)
4458TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d)
4459TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s)
4460TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d)
4461TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s)
4462TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d)
4463TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s)
4464TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d)
4465TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
4466TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
4467TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
4468TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
4469TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
4470TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s)
4471TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s)
4472TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s)
4473TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s)
4474TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s)
4475TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s)
4476TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
4477TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
4478TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
4479TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
4480TRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s)
4481TRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d)
4482TRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s)
4483TRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d)
4484TRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s)
4485TRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d)
4486TRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s)
4487TRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d)
4488TRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s)
4489TRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d)
4490TRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s)
4491TRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d)
4492TRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s)
4493TRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d)
4494TRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d)
4495TRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d)
4496TRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d)
4497TRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d)
4498TRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d)
4499TRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s)
4500TRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s)
4501TRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s)
4502TRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s)
4503TRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s)
4504TRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s)
4505TRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s)
4506TRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s)
4507TRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s)
4508TRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s)
4509
4510TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
4511TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
4512TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu)
4513TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
4514TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
4515TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
4516TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
4517TRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w)
4518TRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l)
4519TRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu)
4520TRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu)
4521TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w)
4522TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w)
4523TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l)
4524
4525static bool do_cmp_vl(DisasContext *ctx, arg_vvv *a,
4526                      uint32_t oprsz, MemOp mop, TCGCond cond)
4527{
4528    uint32_t vd_ofs, vj_ofs, vk_ofs;
4529
4530    if (!check_vec(ctx, oprsz)) {
4531        return true;
4532    }
4533
4534    vd_ofs = vec_full_offset(a->vd);
4535    vj_ofs = vec_full_offset(a->vj);
4536    vk_ofs = vec_full_offset(a->vk);
4537
4538    tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
4539    return true;
4540}
4541
4542static bool do_cmp(DisasContext *ctx, arg_vvv *a,
4543                   MemOp mop, TCGCond cond)
4544{
4545    return do_cmp_vl(ctx, a, 16, mop, cond);
4546}
4547
4548static bool do_xcmp(DisasContext *ctx, arg_vvv *a,
4549                    MemOp mop, TCGCond cond)
4550{
4551    return do_cmp_vl(ctx, a, 32, mop, cond);
4552}
4553
4554static bool do_cmpi_vl(DisasContext *ctx, arg_vv_i *a,
4555                       uint32_t oprsz, MemOp mop, TCGCond cond)
4556{
4557    uint32_t vd_ofs, vj_ofs;
4558
4559    if (!check_vec(ctx, oprsz)) {
4560        return true;
4561    }
4562
4563    vd_ofs = vec_full_offset(a->vd);
4564    vj_ofs = vec_full_offset(a->vj);
4565
4566    tcg_gen_gvec_cmpi(cond, mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
4567    return true;
4568}
4569
4570static bool do_cmpi(DisasContext *ctx, arg_vv_i *a,
4571                    MemOp mop, TCGCond cond)
4572{
4573    return do_cmpi_vl(ctx, a, 16, mop, cond);
4574}
4575
4576static bool do_xcmpi(DisasContext *ctx, arg_vv_i *a,
4577                     MemOp mop, TCGCond cond)
4578{
4579    return do_cmpi_vl(ctx, a, 32, mop, cond);
4580}
4581
4582TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ)
4583TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ)
4584TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ)
4585TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ)
4586TRANS(vseqi_b, LSX, do_cmpi, MO_8, TCG_COND_EQ)
4587TRANS(vseqi_h, LSX, do_cmpi, MO_16, TCG_COND_EQ)
4588TRANS(vseqi_w, LSX, do_cmpi, MO_32, TCG_COND_EQ)
4589TRANS(vseqi_d, LSX, do_cmpi, MO_64, TCG_COND_EQ)
4590TRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ)
4591TRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ)
4592TRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ)
4593TRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ)
4594TRANS(xvseqi_b, LASX, do_xcmpi, MO_8, TCG_COND_EQ)
4595TRANS(xvseqi_h, LASX, do_xcmpi, MO_16, TCG_COND_EQ)
4596TRANS(xvseqi_w, LASX, do_xcmpi, MO_32, TCG_COND_EQ)
4597TRANS(xvseqi_d, LASX, do_xcmpi, MO_64, TCG_COND_EQ)
4598
4599TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE)
4600TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE)
4601TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE)
4602TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE)
4603TRANS(vslei_b, LSX, do_cmpi, MO_8, TCG_COND_LE)
4604TRANS(vslei_h, LSX, do_cmpi, MO_16, TCG_COND_LE)
4605TRANS(vslei_w, LSX, do_cmpi, MO_32, TCG_COND_LE)
4606TRANS(vslei_d, LSX, do_cmpi, MO_64, TCG_COND_LE)
4607TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU)
4608TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU)
4609TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU)
4610TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU)
4611TRANS(vslei_bu, LSX, do_cmpi, MO_8, TCG_COND_LEU)
4612TRANS(vslei_hu, LSX, do_cmpi, MO_16, TCG_COND_LEU)
4613TRANS(vslei_wu, LSX, do_cmpi, MO_32, TCG_COND_LEU)
4614TRANS(vslei_du, LSX, do_cmpi, MO_64, TCG_COND_LEU)
4615TRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE)
4616TRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE)
4617TRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE)
4618TRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE)
4619TRANS(xvslei_b, LASX, do_xcmpi, MO_8, TCG_COND_LE)
4620TRANS(xvslei_h, LASX, do_xcmpi, MO_16, TCG_COND_LE)
4621TRANS(xvslei_w, LASX, do_xcmpi, MO_32, TCG_COND_LE)
4622TRANS(xvslei_d, LASX, do_xcmpi, MO_64, TCG_COND_LE)
4623TRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU)
4624TRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU)
4625TRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU)
4626TRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU)
4627TRANS(xvslei_bu, LASX, do_xcmpi, MO_8, TCG_COND_LEU)
4628TRANS(xvslei_hu, LASX, do_xcmpi, MO_16, TCG_COND_LEU)
4629TRANS(xvslei_wu, LASX, do_xcmpi, MO_32, TCG_COND_LEU)
4630TRANS(xvslei_du, LASX, do_xcmpi, MO_64, TCG_COND_LEU)
4631
4632TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT)
4633TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT)
4634TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT)
4635TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT)
4636TRANS(vslti_b, LSX, do_cmpi, MO_8, TCG_COND_LT)
4637TRANS(vslti_h, LSX, do_cmpi, MO_16, TCG_COND_LT)
4638TRANS(vslti_w, LSX, do_cmpi, MO_32, TCG_COND_LT)
4639TRANS(vslti_d, LSX, do_cmpi, MO_64, TCG_COND_LT)
4640TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU)
4641TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU)
4642TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU)
4643TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU)
4644TRANS(vslti_bu, LSX, do_cmpi, MO_8, TCG_COND_LTU)
4645TRANS(vslti_hu, LSX, do_cmpi, MO_16, TCG_COND_LTU)
4646TRANS(vslti_wu, LSX, do_cmpi, MO_32, TCG_COND_LTU)
4647TRANS(vslti_du, LSX, do_cmpi, MO_64, TCG_COND_LTU)
4648TRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT)
4649TRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT)
4650TRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT)
4651TRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT)
4652TRANS(xvslti_b, LASX, do_xcmpi, MO_8, TCG_COND_LT)
4653TRANS(xvslti_h, LASX, do_xcmpi, MO_16, TCG_COND_LT)
4654TRANS(xvslti_w, LASX, do_xcmpi, MO_32, TCG_COND_LT)
4655TRANS(xvslti_d, LASX, do_xcmpi, MO_64, TCG_COND_LT)
4656TRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU)
4657TRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU)
4658TRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU)
4659TRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU)
4660TRANS(xvslti_bu, LASX, do_xcmpi, MO_8, TCG_COND_LTU)
4661TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU)
4662TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU)
4663TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU)
4664
4665static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
4666{
4667    uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1);
4668    void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
4669    TCGv_i32 vd = tcg_constant_i32(a->vd);
4670    TCGv_i32 vj = tcg_constant_i32(a->vj);
4671    TCGv_i32 vk = tcg_constant_i32(a->vk);
4672    TCGv_i32 oprsz = tcg_constant_i32(sz);
4673
4674    if(flags == -1){
4675        generate_exception(ctx, EXCCODE_INE);
4676        return true;
4677    }
4678
4679    if (!check_vec(ctx, sz)) {
4680        return true;
4681    }
4682
4683    fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
4684    fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
4685
4686    return true;
4687}
4688
4689static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
4690{
4691    uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1);
4692    void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
4693    TCGv_i32 vd = tcg_constant_i32(a->vd);
4694    TCGv_i32 vj = tcg_constant_i32(a->vj);
4695    TCGv_i32 vk = tcg_constant_i32(a->vk);
4696    TCGv_i32 oprsz = tcg_constant_i32(sz);
4697
4698    if (flags == -1) {
4699        generate_exception(ctx, EXCCODE_INE);
4700        return true;
4701    }
4702
4703    if (!check_vec(ctx, sz)) {
4704        return true;
4705    }
4706
4707    fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
4708    fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
4709
4710    return true;
4711}
4712
4713TRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16)
4714TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16)
4715TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32)
4716TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32)
4717
4718static bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz)
4719{
4720    if (!check_vec(ctx, oprsz)) {
4721        return true;
4722    }
4723
4724    tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
4725                        vec_full_offset(a->vk), vec_full_offset(a->vj),
4726                        oprsz, ctx->vl / 8);
4727    return true;
4728}
4729
4730TRANS(vbitsel_v, LSX, do_vbitsel_v, 16)
4731TRANS(xvbitsel_v, LASX, do_vbitsel_v, 32)
4732
4733static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
4734{
4735    tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
4736}
4737
4738static bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
4739{
4740    static const GVecGen2i op = {
4741       .fniv = gen_vbitseli,
4742       .fnoi = gen_helper_vbitseli_b,
4743       .vece = MO_8,
4744       .load_dest = true
4745    };
4746
4747    if (!check_vec(ctx, oprsz)) {
4748        return true;
4749    }
4750
4751    tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
4752                    oprsz, ctx->vl / 8, a->imm , &op);
4753    return true;
4754}
4755
4756TRANS(vbitseli_b, LSX, do_vbitseli_b, 16)
4757TRANS(xvbitseli_b, LASX, do_vbitseli_b, 32)
4758
4759#define VSET(NAME, COND)                                                       \
4760static bool trans_## NAME (DisasContext *ctx, arg_cv *a)                       \
4761{                                                                              \
4762    TCGv_i64 t1, al, ah;                                                       \
4763                                                                               \
4764    al = tcg_temp_new_i64();                                                   \
4765    ah = tcg_temp_new_i64();                                                   \
4766    t1 = tcg_temp_new_i64();                                                   \
4767                                                                               \
4768    get_vreg64(ah, a->vj, 1);                                                  \
4769    get_vreg64(al, a->vj, 0);                                                  \
4770                                                                               \
4771    if (!avail_LSX(ctx)) {                                                     \
4772        return false;                                                          \
4773    }                                                                          \
4774                                                                               \
4775    if (!check_vec(ctx, 16)) {                                                 \
4776        return true;                                                           \
4777    }                                                                          \
4778                                                                               \
4779    tcg_gen_or_i64(t1, al, ah);                                                \
4780    tcg_gen_setcondi_i64(COND, t1, t1, 0);                                     \
4781    tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
4782                                                                               \
4783    return true;                                                               \
4784}
4785
4786VSET(vseteqz_v, TCG_COND_EQ)
4787VSET(vsetnez_v, TCG_COND_NE)
4788
4789TRANS(vsetanyeqz_b, LSX, gen_cv, gen_helper_vsetanyeqz_b)
4790TRANS(vsetanyeqz_h, LSX, gen_cv, gen_helper_vsetanyeqz_h)
4791TRANS(vsetanyeqz_w, LSX, gen_cv, gen_helper_vsetanyeqz_w)
4792TRANS(vsetanyeqz_d, LSX, gen_cv, gen_helper_vsetanyeqz_d)
4793TRANS(vsetallnez_b, LSX, gen_cv, gen_helper_vsetallnez_b)
4794TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h)
4795TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w)
4796TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d)
4797
4798#define XVSET(NAME, COND)                                                      \
4799static bool trans_## NAME(DisasContext *ctx, arg_cv * a)                       \
4800{                                                                              \
4801    TCGv_i64 t1, t2, d[4];                                                     \
4802                                                                               \
4803    d[0] = tcg_temp_new_i64();                                                 \
4804    d[1] = tcg_temp_new_i64();                                                 \
4805    d[2] = tcg_temp_new_i64();                                                 \
4806    d[3] = tcg_temp_new_i64();                                                 \
4807    t1 = tcg_temp_new_i64();                                                   \
4808    t2 = tcg_temp_new_i64();                                                   \
4809                                                                               \
4810    get_vreg64(d[0], a->vj, 0);                                                \
4811    get_vreg64(d[1], a->vj, 1);                                                \
4812    get_vreg64(d[2], a->vj, 2);                                                \
4813    get_vreg64(d[3], a->vj, 3);                                                \
4814                                                                               \
4815    if (!avail_LASX(ctx)) {                                                    \
4816        return false;                                                          \
4817    }                                                                          \
4818                                                                               \
4819    if (!check_vec(ctx, 32)) {                                                 \
4820        return true;                                                           \
4821    }                                                                          \
4822                                                                               \
4823    tcg_gen_or_i64(t1, d[0], d[1]);                                            \
4824    tcg_gen_or_i64(t2, d[2], d[3]);                                            \
4825    tcg_gen_or_i64(t1, t2, t1);                                                \
4826    tcg_gen_setcondi_i64(COND, t1, t1, 0);                                     \
4827    tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
4828                                                                               \
4829    return true;                                                               \
4830}
4831
4832XVSET(xvseteqz_v, TCG_COND_EQ)
4833XVSET(xvsetnez_v, TCG_COND_NE)
4834
4835TRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b)
4836TRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h)
4837TRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w)
4838TRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d)
4839TRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b)
4840TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h)
4841TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w)
4842TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d)
4843
4844static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop,
4845                       void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4846{
4847    TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4848
4849    if (!check_vec(ctx, oprsz)) {
4850        return true;
4851    }
4852
4853    func(src, tcg_env, vec_reg_offset(a->vd, a->imm, mop));
4854
4855    return true;
4856}
4857
4858static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop,
4859                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4860{
4861    return gen_g2v_vl(ctx, a, 16, mop, func);
4862}
4863
4864static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop,
4865                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4866{
4867    return gen_g2v_vl(ctx, a, 32, mop, func);
4868}
4869
4870TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64)
4871TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64)
4872TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64)
4873TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64)
4874TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64)
4875TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64)
4876
4877static bool gen_v2g_vl(DisasContext *ctx, arg_rv_i *a, uint32_t oprsz, MemOp mop,
4878                       void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4879{
4880    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4881
4882    if (!check_vec(ctx, oprsz)) {
4883        return true;
4884    }
4885
4886    func(dst, tcg_env, vec_reg_offset(a->vj, a->imm, mop));
4887
4888    return true;
4889}
4890
4891static bool gen_v2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
4892                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4893{
4894    return gen_v2g_vl(ctx, a, 16, mop, func);
4895}
4896
4897static bool gen_x2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
4898                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
4899{
4900    return gen_v2g_vl(ctx, a, 32, mop, func);
4901}
4902
4903TRANS(vpickve2gr_b, LSX, gen_v2g, MO_8, tcg_gen_ld8s_i64)
4904TRANS(vpickve2gr_h, LSX, gen_v2g, MO_16, tcg_gen_ld16s_i64)
4905TRANS(vpickve2gr_w, LSX, gen_v2g, MO_32, tcg_gen_ld32s_i64)
4906TRANS(vpickve2gr_d, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
4907TRANS(vpickve2gr_bu, LSX, gen_v2g, MO_8, tcg_gen_ld8u_i64)
4908TRANS(vpickve2gr_hu, LSX, gen_v2g, MO_16, tcg_gen_ld16u_i64)
4909TRANS(vpickve2gr_wu, LSX, gen_v2g, MO_32, tcg_gen_ld32u_i64)
4910TRANS(vpickve2gr_du, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
4911TRANS(xvpickve2gr_w, LASX, gen_x2g, MO_32, tcg_gen_ld32s_i64)
4912TRANS(xvpickve2gr_d, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
4913TRANS(xvpickve2gr_wu, LASX, gen_x2g, MO_32, tcg_gen_ld32u_i64)
4914TRANS(xvpickve2gr_du, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
4915
4916static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
4917                        uint32_t oprsz, MemOp mop)
4918{
4919    TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4920
4921    if (!check_vec(ctx, oprsz)) {
4922        return true;
4923    }
4924
4925    tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
4926                         oprsz, ctx->vl/8, src);
4927    return true;
4928}
4929
4930static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
4931{
4932    return gvec_dup_vl(ctx, a, 16, mop);
4933}
4934
4935static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop)
4936{
4937    return gvec_dup_vl(ctx, a, 32, mop);
4938}
4939
4940TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
4941TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
4942TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
4943TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
4944TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8)
4945TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16)
4946TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32)
4947TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64)
4948
4949static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
4950{
4951    if (!avail_LSX(ctx)) {
4952        return false;
4953    }
4954
4955    if (!check_vec(ctx, 16)) {
4956        return true;
4957    }
4958
4959    tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
4960                         offsetof(CPULoongArchState,
4961                                  fpr[a->vj].vreg.B((a->imm))),
4962                         16, ctx->vl/8);
4963    return true;
4964}
4965
4966static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
4967{
4968    if (!avail_LSX(ctx)) {
4969        return false;
4970    }
4971
4972    if (!check_vec(ctx, 16)) {
4973        return true;
4974    }
4975
4976    tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
4977                         offsetof(CPULoongArchState,
4978                                  fpr[a->vj].vreg.H((a->imm))),
4979                         16, ctx->vl/8);
4980    return true;
4981}
4982static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
4983{
4984    if (!avail_LSX(ctx)) {
4985        return false;
4986    }
4987
4988    if (!check_vec(ctx, 16)) {
4989        return true;
4990    }
4991
4992    tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
4993                         offsetof(CPULoongArchState,
4994                                  fpr[a->vj].vreg.W((a->imm))),
4995                        16, ctx->vl/8);
4996    return true;
4997}
4998static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
4999{
5000    if (!avail_LSX(ctx)) {
5001        return false;
5002    }
5003
5004    if (!check_vec(ctx, 16)) {
5005        return true;
5006    }
5007
5008    tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
5009                         offsetof(CPULoongArchState,
5010                                  fpr[a->vj].vreg.D((a->imm))),
5011                         16, ctx->vl/8);
5012    return true;
5013}
5014
5015static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
5016                           uint32_t oprsz, int vece, int bit,
5017                           void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
5018{
5019    int i;
5020    TCGv_i64 t0 = tcg_temp_new_i64();
5021    TCGv_ptr t1 = tcg_temp_new_ptr();
5022    TCGv_i64 t2 = tcg_temp_new_i64();
5023
5024    if (!check_vec(ctx, oprsz)) {
5025        return true;
5026    }
5027
5028    tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
5029    tcg_gen_shli_i64(t0, t0, vece);
5030    if (HOST_BIG_ENDIAN) {
5031        tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1));
5032    }
5033
5034    tcg_gen_trunc_i64_ptr(t1, t0);
5035    tcg_gen_add_ptr(t1, t1, tcg_env);
5036
5037    for (i = 0; i < oprsz; i += 16) {
5038        func(t2, t1, vec_full_offset(a->vj) + i);
5039        tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2);
5040    }
5041
5042    return true;
5043}
5044
5045static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
5046                        void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
5047{
5048    return gen_vreplve_vl(ctx, a, 16, vece, bit, func);
5049}
5050
5051static bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
5052                         void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
5053{
5054    return gen_vreplve_vl(ctx, a, 32, vece, bit, func);
5055}
5056
5057TRANS(vreplve_b, LSX, gen_vreplve, MO_8,  8, tcg_gen_ld8u_i64)
5058TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
5059TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
5060TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
5061TRANS(xvreplve_b, LASX, gen_xvreplve, MO_8,  8, tcg_gen_ld8u_i64)
5062TRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64)
5063TRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64)
5064TRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64)
5065
5066static bool gen_xvrepl128(DisasContext *ctx, arg_vv_i *a, MemOp mop)
5067{
5068    int i;
5069
5070    if (!check_vec(ctx, 32)) {
5071        return true;
5072    }
5073
5074    for (i = 0; i < 32; i += 16) {
5075        tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd) + i,
5076                             vec_reg_offset(a->vj, a->imm, mop) + i, 16, 16);
5077
5078    }
5079    return true;
5080}
5081
5082TRANS(xvrepl128vei_b, LASX, gen_xvrepl128, MO_8)
5083TRANS(xvrepl128vei_h, LASX, gen_xvrepl128, MO_16)
5084TRANS(xvrepl128vei_w, LASX, gen_xvrepl128, MO_32)
5085TRANS(xvrepl128vei_d, LASX, gen_xvrepl128, MO_64)
5086
5087static bool gen_xvreplve0(DisasContext *ctx, arg_vv *a, MemOp mop)
5088{
5089    if (!check_vec(ctx, 32)) {
5090        return true;
5091    }
5092
5093    tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd),
5094                         vec_full_offset(a->vj), 32, 32);
5095    return true;
5096}
5097
5098TRANS(xvreplve0_b, LASX, gen_xvreplve0, MO_8)
5099TRANS(xvreplve0_h, LASX, gen_xvreplve0, MO_16)
5100TRANS(xvreplve0_w, LASX, gen_xvreplve0, MO_32)
5101TRANS(xvreplve0_d, LASX, gen_xvreplve0, MO_64)
5102TRANS(xvreplve0_q, LASX, gen_xvreplve0, MO_128)
5103
5104TRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w)
5105TRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d)
5106
5107TRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w)
5108TRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d)
5109
5110static bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
5111{
5112    int i, ofs;
5113
5114    if (!check_vec(ctx, oprsz)) {
5115        return true;
5116    }
5117
5118    for (i = 0; i < oprsz / 16; i++) {
5119        TCGv desthigh = tcg_temp_new_i64();
5120        TCGv destlow = tcg_temp_new_i64();
5121        TCGv high = tcg_temp_new_i64();
5122        TCGv low = tcg_temp_new_i64();
5123
5124        get_vreg64(low, a->vj, 2 * i);
5125
5126        ofs = ((a->imm) & 0xf) * 8;
5127        if (ofs < 64) {
5128            get_vreg64(high, a->vj, 2 * i + 1);
5129            tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
5130            tcg_gen_shli_i64(destlow, low, ofs);
5131        } else {
5132            tcg_gen_shli_i64(desthigh, low, ofs - 64);
5133            destlow = tcg_constant_i64(0);
5134        }
5135        set_vreg64(desthigh, a->vd, 2 * i + 1);
5136        set_vreg64(destlow, a->vd, 2 * i);
5137    }
5138
5139    return true;
5140}
5141
5142static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
5143{
5144    int i, ofs;
5145
5146    if (!check_vec(ctx, oprsz)) {
5147        return true;
5148    }
5149
5150    for (i = 0; i < oprsz / 16; i++) {
5151        TCGv desthigh = tcg_temp_new_i64();
5152        TCGv destlow = tcg_temp_new_i64();
5153        TCGv high = tcg_temp_new_i64();
5154        TCGv low = tcg_temp_new_i64();
5155        get_vreg64(high, a->vj, 2 * i + 1);
5156
5157        ofs = ((a->imm) & 0xf) * 8;
5158        if (ofs < 64) {
5159            get_vreg64(low, a->vj, 2 * i);
5160            tcg_gen_extract2_i64(destlow, low, high, ofs);
5161            tcg_gen_shri_i64(desthigh, high, ofs);
5162        } else {
5163            tcg_gen_shri_i64(destlow, high, ofs - 64);
5164            desthigh = tcg_constant_i64(0);
5165        }
5166        set_vreg64(desthigh, a->vd, 2 * i + 1);
5167        set_vreg64(destlow, a->vd, 2 * i);
5168    }
5169
5170    return true;
5171}
5172
5173TRANS(vbsll_v, LSX, do_vbsll_v, 16)
5174TRANS(vbsrl_v, LSX, do_vbsrl_v, 16)
5175TRANS(xvbsll_v, LASX, do_vbsll_v, 32)
5176TRANS(xvbsrl_v, LASX, do_vbsrl_v, 32)
5177
5178TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b)
5179TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h)
5180TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w)
5181TRANS(vpackev_d, LSX, gen_vvv, gen_helper_vpackev_d)
5182TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b)
5183TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h)
5184TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w)
5185TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d)
5186TRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b)
5187TRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h)
5188TRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w)
5189TRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d)
5190TRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b)
5191TRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h)
5192TRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w)
5193TRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d)
5194
5195TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b)
5196TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h)
5197TRANS(vpickev_w, LSX, gen_vvv, gen_helper_vpickev_w)
5198TRANS(vpickev_d, LSX, gen_vvv, gen_helper_vpickev_d)
5199TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b)
5200TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h)
5201TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w)
5202TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d)
5203TRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b)
5204TRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h)
5205TRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w)
5206TRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d)
5207TRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b)
5208TRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h)
5209TRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w)
5210TRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d)
5211
5212TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b)
5213TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h)
5214TRANS(vilvl_w, LSX, gen_vvv, gen_helper_vilvl_w)
5215TRANS(vilvl_d, LSX, gen_vvv, gen_helper_vilvl_d)
5216TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b)
5217TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h)
5218TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w)
5219TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d)
5220TRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b)
5221TRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h)
5222TRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w)
5223TRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d)
5224TRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b)
5225TRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h)
5226TRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w)
5227TRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d)
5228
5229TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
5230TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
5231TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w)
5232TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d)
5233TRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b)
5234TRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h)
5235TRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w)
5236TRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d)
5237TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b)
5238TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h)
5239TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w)
5240TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d)
5241TRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b)
5242TRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h)
5243TRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w)
5244TRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d)
5245
5246TRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w)
5247TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w)
5248TRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w)
5249TRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d)
5250TRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q)
5251
5252TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b)
5253TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h)
5254TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w)
5255TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d)
5256TRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b)
5257TRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h)
5258TRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w)
5259TRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d)
5260
5261static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
5262{
5263    TCGv addr;
5264    TCGv_i64 rl, rh;
5265    TCGv_i128 val;
5266
5267    if (!avail_LSX(ctx)) {
5268        return false;
5269    }
5270
5271    if (!check_vec(ctx, 16)) {
5272        return true;
5273    }
5274
5275    addr = gpr_src(ctx, a->rj, EXT_NONE);
5276    val = tcg_temp_new_i128();
5277    rl = tcg_temp_new_i64();
5278    rh = tcg_temp_new_i64();
5279
5280    addr = make_address_i(ctx, addr, a->imm);
5281
5282    tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5283    tcg_gen_extr_i128_i64(rl, rh, val);
5284    set_vreg64(rh, a->vd, 1);
5285    set_vreg64(rl, a->vd, 0);
5286
5287    return true;
5288}
5289
5290static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
5291{
5292    TCGv addr;
5293    TCGv_i128 val;
5294    TCGv_i64 ah, al;
5295
5296    if (!avail_LSX(ctx)) {
5297        return false;
5298    }
5299
5300    if (!check_vec(ctx, 16)) {
5301        return true;
5302    }
5303
5304    addr = gpr_src(ctx, a->rj, EXT_NONE);
5305    val = tcg_temp_new_i128();
5306    ah = tcg_temp_new_i64();
5307    al = tcg_temp_new_i64();
5308
5309    addr = make_address_i(ctx, addr, a->imm);
5310
5311    get_vreg64(ah, a->vd, 1);
5312    get_vreg64(al, a->vd, 0);
5313    tcg_gen_concat_i64_i128(val, al, ah);
5314    tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5315
5316    return true;
5317}
5318
5319static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
5320{
5321    TCGv addr, src1, src2;
5322    TCGv_i64 rl, rh;
5323    TCGv_i128 val;
5324
5325    if (!avail_LSX(ctx)) {
5326        return false;
5327    }
5328
5329    if (!check_vec(ctx, 16)) {
5330        return true;
5331    }
5332
5333    src1 = gpr_src(ctx, a->rj, EXT_NONE);
5334    src2 = gpr_src(ctx, a->rk, EXT_NONE);
5335    val = tcg_temp_new_i128();
5336    rl = tcg_temp_new_i64();
5337    rh = tcg_temp_new_i64();
5338
5339    addr = make_address_x(ctx, src1, src2);
5340    tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5341    tcg_gen_extr_i128_i64(rl, rh, val);
5342    set_vreg64(rh, a->vd, 1);
5343    set_vreg64(rl, a->vd, 0);
5344
5345    return true;
5346}
5347
5348static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
5349{
5350    TCGv addr, src1, src2;
5351    TCGv_i64 ah, al;
5352    TCGv_i128 val;
5353
5354    if (!avail_LSX(ctx)) {
5355        return false;
5356    }
5357
5358    if (!check_vec(ctx, 16)) {
5359        return true;
5360    }
5361
5362    src1 = gpr_src(ctx, a->rj, EXT_NONE);
5363    src2 = gpr_src(ctx, a->rk, EXT_NONE);
5364    val = tcg_temp_new_i128();
5365    ah = tcg_temp_new_i64();
5366    al = tcg_temp_new_i64();
5367
5368    addr = make_address_x(ctx, src1, src2);
5369    get_vreg64(ah, a->vd, 1);
5370    get_vreg64(al, a->vd, 0);
5371    tcg_gen_concat_i64_i128(val, al, ah);
5372    tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
5373
5374    return true;
5375}
5376
5377static bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a,
5378                          uint32_t oprsz, MemOp mop)
5379{
5380    TCGv addr;
5381    TCGv_i64 val;
5382
5383    if (!check_vec(ctx, oprsz)) {
5384        return true;
5385    }
5386
5387    addr = gpr_src(ctx, a->rj, EXT_NONE);
5388    val = tcg_temp_new_i64();
5389
5390    addr = make_address_i(ctx, addr, a->imm);
5391
5392    tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop);
5393    tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val);
5394
5395    return true;
5396}
5397
5398static bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
5399{
5400    return do_vldrepl_vl(ctx, a, 16, mop);
5401}
5402
5403static bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
5404{
5405    return do_vldrepl_vl(ctx, a, 32, mop);
5406}
5407
5408TRANS(vldrepl_b, LSX, do_vldrepl, MO_8)
5409TRANS(vldrepl_h, LSX, do_vldrepl, MO_16)
5410TRANS(vldrepl_w, LSX, do_vldrepl, MO_32)
5411TRANS(vldrepl_d, LSX, do_vldrepl, MO_64)
5412TRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8)
5413TRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16)
5414TRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32)
5415TRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64)
5416
5417static bool do_vstelm_vl(DisasContext *ctx,
5418                         arg_vr_ii *a, uint32_t oprsz, MemOp mop)
5419{
5420    TCGv addr;
5421    TCGv_i64 val;
5422
5423    if (!check_vec(ctx, oprsz)) {
5424        return true;
5425    }
5426
5427    addr = gpr_src(ctx, a->rj, EXT_NONE);
5428    val = tcg_temp_new_i64();
5429
5430    addr = make_address_i(ctx, addr, a->imm);
5431    tcg_gen_ld_i64(val, tcg_env, vec_reg_offset(a->vd, a->imm2, mop));
5432    tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, mop);
5433    return true;
5434}
5435
5436static bool do_vstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
5437{
5438    return do_vstelm_vl(ctx, a, 16, mop);
5439}
5440
5441static bool do_xvstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
5442{
5443    return do_vstelm_vl(ctx, a, 32, mop);
5444}
5445
5446TRANS(vstelm_b, LSX, do_vstelm, MO_8)
5447TRANS(vstelm_h, LSX, do_vstelm, MO_16)
5448TRANS(vstelm_w, LSX, do_vstelm, MO_32)
5449TRANS(vstelm_d, LSX, do_vstelm, MO_64)
5450TRANS(xvstelm_b, LASX, do_xvstelm, MO_8)
5451TRANS(xvstelm_h, LASX, do_xvstelm, MO_16)
5452TRANS(xvstelm_w, LASX, do_xvstelm, MO_32)
5453TRANS(xvstelm_d, LASX, do_xvstelm, MO_64)
5454
5455static bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a,
5456                            void (*func)(DisasContext *, int, TCGv))
5457{
5458    TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
5459    TCGv temp = NULL;
5460
5461    if (!check_vec(ctx, 32)) {
5462        return true;
5463    }
5464
5465    if (a->imm) {
5466        temp = tcg_temp_new();
5467        tcg_gen_addi_tl(temp, addr, a->imm);
5468        addr = temp;
5469    }
5470
5471    func(ctx, a->vd, addr);
5472    return true;
5473}
5474
5475static void gen_xvld(DisasContext *ctx, int vreg, TCGv addr)
5476{
5477    int i;
5478    TCGv temp = tcg_temp_new();
5479    TCGv dest = tcg_temp_new();
5480
5481    tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
5482    set_vreg64(dest, vreg, 0);
5483
5484    for (i = 1; i < 4; i++) {
5485        tcg_gen_addi_tl(temp, addr, 8 * i);
5486        tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
5487        set_vreg64(dest, vreg, i);
5488    }
5489}
5490
5491static void gen_xvst(DisasContext * ctx, int vreg, TCGv addr)
5492{
5493    int i;
5494    TCGv temp = tcg_temp_new();
5495    TCGv dest = tcg_temp_new();
5496
5497    get_vreg64(dest, vreg, 0);
5498    tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
5499
5500    for (i = 1; i < 4; i++) {
5501        tcg_gen_addi_tl(temp, addr, 8 * i);
5502        get_vreg64(dest, vreg, i);
5503        tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
5504    }
5505}
5506
5507TRANS(xvld, LASX, gen_lasx_memory, gen_xvld)
5508TRANS(xvst, LASX, gen_lasx_memory, gen_xvst)
5509
5510static bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a,
5511                             void (*func)(DisasContext*, int, TCGv))
5512{
5513    TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
5514    TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
5515    TCGv addr = tcg_temp_new();
5516
5517    if (!check_vec(ctx, 32)) {
5518        return true;
5519    }
5520
5521    tcg_gen_add_tl(addr, src1, src2);
5522    func(ctx, a->vd, addr);
5523
5524    return true;
5525}
5526
5527TRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld)
5528TRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst)
5529