xref: /openbmc/qemu/target/arm/tcg/gengvec.c (revision 09a52d85)
1*09a52d85SRichard Henderson /*
2*09a52d85SRichard Henderson  *  ARM generic vector expansion
3*09a52d85SRichard Henderson  *
4*09a52d85SRichard Henderson  *  Copyright (c) 2003 Fabrice Bellard
5*09a52d85SRichard Henderson  *  Copyright (c) 2005-2007 CodeSourcery
6*09a52d85SRichard Henderson  *  Copyright (c) 2007 OpenedHand, Ltd.
7*09a52d85SRichard Henderson  *
8*09a52d85SRichard Henderson  * This library is free software; you can redistribute it and/or
9*09a52d85SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
10*09a52d85SRichard Henderson  * License as published by the Free Software Foundation; either
11*09a52d85SRichard Henderson  * version 2.1 of the License, or (at your option) any later version.
12*09a52d85SRichard Henderson  *
13*09a52d85SRichard Henderson  * This library is distributed in the hope that it will be useful,
14*09a52d85SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15*09a52d85SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16*09a52d85SRichard Henderson  * Lesser General Public License for more details.
17*09a52d85SRichard Henderson  *
18*09a52d85SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
19*09a52d85SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20*09a52d85SRichard Henderson  */
21*09a52d85SRichard Henderson 
22*09a52d85SRichard Henderson #include "qemu/osdep.h"
23*09a52d85SRichard Henderson #include "translate.h"
24*09a52d85SRichard Henderson 
25*09a52d85SRichard Henderson 
26*09a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
27*09a52d85SRichard Henderson                             uint32_t opr_sz, uint32_t max_sz,
28*09a52d85SRichard Henderson                             gen_helper_gvec_3_ptr *fn)
29*09a52d85SRichard Henderson {
30*09a52d85SRichard Henderson     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
31*09a52d85SRichard Henderson 
32*09a52d85SRichard Henderson     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
33*09a52d85SRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
34*09a52d85SRichard Henderson                        opr_sz, max_sz, 0, fn);
35*09a52d85SRichard Henderson }
36*09a52d85SRichard Henderson 
37*09a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
38*09a52d85SRichard Henderson                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
39*09a52d85SRichard Henderson {
40*09a52d85SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
41*09a52d85SRichard Henderson         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
42*09a52d85SRichard Henderson     };
43*09a52d85SRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
44*09a52d85SRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
45*09a52d85SRichard Henderson }
46*09a52d85SRichard Henderson 
47*09a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
48*09a52d85SRichard Henderson                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
49*09a52d85SRichard Henderson {
50*09a52d85SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
51*09a52d85SRichard Henderson         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
52*09a52d85SRichard Henderson     };
53*09a52d85SRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
54*09a52d85SRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
55*09a52d85SRichard Henderson }
56*09a52d85SRichard Henderson 
57*09a52d85SRichard Henderson #define GEN_CMP0(NAME, COND)                              \
58*09a52d85SRichard Henderson     void NAME(unsigned vece, uint32_t d, uint32_t m,      \
59*09a52d85SRichard Henderson               uint32_t opr_sz, uint32_t max_sz)           \
60*09a52d85SRichard Henderson     { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
61*09a52d85SRichard Henderson 
62*09a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
63*09a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
64*09a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
65*09a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
66*09a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
67*09a52d85SRichard Henderson 
68*09a52d85SRichard Henderson #undef GEN_CMP0
69*09a52d85SRichard Henderson 
70*09a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
71*09a52d85SRichard Henderson {
72*09a52d85SRichard Henderson     tcg_gen_vec_sar8i_i64(a, a, shift);
73*09a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, a);
74*09a52d85SRichard Henderson }
75*09a52d85SRichard Henderson 
76*09a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
77*09a52d85SRichard Henderson {
78*09a52d85SRichard Henderson     tcg_gen_vec_sar16i_i64(a, a, shift);
79*09a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, a);
80*09a52d85SRichard Henderson }
81*09a52d85SRichard Henderson 
82*09a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
83*09a52d85SRichard Henderson {
84*09a52d85SRichard Henderson     tcg_gen_sari_i32(a, a, shift);
85*09a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
86*09a52d85SRichard Henderson }
87*09a52d85SRichard Henderson 
88*09a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
89*09a52d85SRichard Henderson {
90*09a52d85SRichard Henderson     tcg_gen_sari_i64(a, a, shift);
91*09a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
92*09a52d85SRichard Henderson }
93*09a52d85SRichard Henderson 
94*09a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
95*09a52d85SRichard Henderson {
96*09a52d85SRichard Henderson     tcg_gen_sari_vec(vece, a, a, sh);
97*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
98*09a52d85SRichard Henderson }
99*09a52d85SRichard Henderson 
100*09a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
101*09a52d85SRichard Henderson                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
102*09a52d85SRichard Henderson {
103*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
104*09a52d85SRichard Henderson         INDEX_op_sari_vec, INDEX_op_add_vec, 0
105*09a52d85SRichard Henderson     };
106*09a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
107*09a52d85SRichard Henderson         { .fni8 = gen_ssra8_i64,
108*09a52d85SRichard Henderson           .fniv = gen_ssra_vec,
109*09a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_b,
110*09a52d85SRichard Henderson           .load_dest = true,
111*09a52d85SRichard Henderson           .opt_opc = vecop_list,
112*09a52d85SRichard Henderson           .vece = MO_8 },
113*09a52d85SRichard Henderson         { .fni8 = gen_ssra16_i64,
114*09a52d85SRichard Henderson           .fniv = gen_ssra_vec,
115*09a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_h,
116*09a52d85SRichard Henderson           .load_dest = true,
117*09a52d85SRichard Henderson           .opt_opc = vecop_list,
118*09a52d85SRichard Henderson           .vece = MO_16 },
119*09a52d85SRichard Henderson         { .fni4 = gen_ssra32_i32,
120*09a52d85SRichard Henderson           .fniv = gen_ssra_vec,
121*09a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_s,
122*09a52d85SRichard Henderson           .load_dest = true,
123*09a52d85SRichard Henderson           .opt_opc = vecop_list,
124*09a52d85SRichard Henderson           .vece = MO_32 },
125*09a52d85SRichard Henderson         { .fni8 = gen_ssra64_i64,
126*09a52d85SRichard Henderson           .fniv = gen_ssra_vec,
127*09a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_d,
128*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
129*09a52d85SRichard Henderson           .opt_opc = vecop_list,
130*09a52d85SRichard Henderson           .load_dest = true,
131*09a52d85SRichard Henderson           .vece = MO_64 },
132*09a52d85SRichard Henderson     };
133*09a52d85SRichard Henderson 
134*09a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
135*09a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
136*09a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
137*09a52d85SRichard Henderson 
138*09a52d85SRichard Henderson     /*
139*09a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
140*09a52d85SRichard Henderson      * Signed results in all sign bits.
141*09a52d85SRichard Henderson      */
142*09a52d85SRichard Henderson     shift = MIN(shift, (8 << vece) - 1);
143*09a52d85SRichard Henderson     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
144*09a52d85SRichard Henderson }
145*09a52d85SRichard Henderson 
146*09a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
147*09a52d85SRichard Henderson {
148*09a52d85SRichard Henderson     tcg_gen_vec_shr8i_i64(a, a, shift);
149*09a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, a);
150*09a52d85SRichard Henderson }
151*09a52d85SRichard Henderson 
152*09a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
153*09a52d85SRichard Henderson {
154*09a52d85SRichard Henderson     tcg_gen_vec_shr16i_i64(a, a, shift);
155*09a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, a);
156*09a52d85SRichard Henderson }
157*09a52d85SRichard Henderson 
158*09a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
159*09a52d85SRichard Henderson {
160*09a52d85SRichard Henderson     tcg_gen_shri_i32(a, a, shift);
161*09a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
162*09a52d85SRichard Henderson }
163*09a52d85SRichard Henderson 
164*09a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
165*09a52d85SRichard Henderson {
166*09a52d85SRichard Henderson     tcg_gen_shri_i64(a, a, shift);
167*09a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
168*09a52d85SRichard Henderson }
169*09a52d85SRichard Henderson 
170*09a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
171*09a52d85SRichard Henderson {
172*09a52d85SRichard Henderson     tcg_gen_shri_vec(vece, a, a, sh);
173*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
174*09a52d85SRichard Henderson }
175*09a52d85SRichard Henderson 
176*09a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
177*09a52d85SRichard Henderson                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
178*09a52d85SRichard Henderson {
179*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
180*09a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
181*09a52d85SRichard Henderson     };
182*09a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
183*09a52d85SRichard Henderson         { .fni8 = gen_usra8_i64,
184*09a52d85SRichard Henderson           .fniv = gen_usra_vec,
185*09a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_b,
186*09a52d85SRichard Henderson           .load_dest = true,
187*09a52d85SRichard Henderson           .opt_opc = vecop_list,
188*09a52d85SRichard Henderson           .vece = MO_8, },
189*09a52d85SRichard Henderson         { .fni8 = gen_usra16_i64,
190*09a52d85SRichard Henderson           .fniv = gen_usra_vec,
191*09a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_h,
192*09a52d85SRichard Henderson           .load_dest = true,
193*09a52d85SRichard Henderson           .opt_opc = vecop_list,
194*09a52d85SRichard Henderson           .vece = MO_16, },
195*09a52d85SRichard Henderson         { .fni4 = gen_usra32_i32,
196*09a52d85SRichard Henderson           .fniv = gen_usra_vec,
197*09a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_s,
198*09a52d85SRichard Henderson           .load_dest = true,
199*09a52d85SRichard Henderson           .opt_opc = vecop_list,
200*09a52d85SRichard Henderson           .vece = MO_32, },
201*09a52d85SRichard Henderson         { .fni8 = gen_usra64_i64,
202*09a52d85SRichard Henderson           .fniv = gen_usra_vec,
203*09a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_d,
204*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
205*09a52d85SRichard Henderson           .load_dest = true,
206*09a52d85SRichard Henderson           .opt_opc = vecop_list,
207*09a52d85SRichard Henderson           .vece = MO_64, },
208*09a52d85SRichard Henderson     };
209*09a52d85SRichard Henderson 
210*09a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
211*09a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
212*09a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
213*09a52d85SRichard Henderson 
214*09a52d85SRichard Henderson     /*
215*09a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
216*09a52d85SRichard Henderson      * Unsigned results in all zeros as input to accumulate: nop.
217*09a52d85SRichard Henderson      */
218*09a52d85SRichard Henderson     if (shift < (8 << vece)) {
219*09a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
220*09a52d85SRichard Henderson     } else {
221*09a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
222*09a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
223*09a52d85SRichard Henderson     }
224*09a52d85SRichard Henderson }
225*09a52d85SRichard Henderson 
226*09a52d85SRichard Henderson /*
227*09a52d85SRichard Henderson  * Shift one less than the requested amount, and the low bit is
228*09a52d85SRichard Henderson  * the rounding bit.  For the 8 and 16-bit operations, because we
229*09a52d85SRichard Henderson  * mask the low bit, we can perform a normal integer shift instead
230*09a52d85SRichard Henderson  * of a vector shift.
231*09a52d85SRichard Henderson  */
232*09a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
233*09a52d85SRichard Henderson {
234*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
235*09a52d85SRichard Henderson 
236*09a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
237*09a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
238*09a52d85SRichard Henderson     tcg_gen_vec_sar8i_i64(d, a, sh);
239*09a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
240*09a52d85SRichard Henderson }
241*09a52d85SRichard Henderson 
242*09a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
243*09a52d85SRichard Henderson {
244*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
245*09a52d85SRichard Henderson 
246*09a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
247*09a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
248*09a52d85SRichard Henderson     tcg_gen_vec_sar16i_i64(d, a, sh);
249*09a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
250*09a52d85SRichard Henderson }
251*09a52d85SRichard Henderson 
252*09a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
253*09a52d85SRichard Henderson {
254*09a52d85SRichard Henderson     TCGv_i32 t;
255*09a52d85SRichard Henderson 
256*09a52d85SRichard Henderson     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
257*09a52d85SRichard Henderson     if (sh == 32) {
258*09a52d85SRichard Henderson         tcg_gen_movi_i32(d, 0);
259*09a52d85SRichard Henderson         return;
260*09a52d85SRichard Henderson     }
261*09a52d85SRichard Henderson     t = tcg_temp_new_i32();
262*09a52d85SRichard Henderson     tcg_gen_extract_i32(t, a, sh - 1, 1);
263*09a52d85SRichard Henderson     tcg_gen_sari_i32(d, a, sh);
264*09a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
265*09a52d85SRichard Henderson }
266*09a52d85SRichard Henderson 
267*09a52d85SRichard Henderson  void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
268*09a52d85SRichard Henderson {
269*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
270*09a52d85SRichard Henderson 
271*09a52d85SRichard Henderson     tcg_gen_extract_i64(t, a, sh - 1, 1);
272*09a52d85SRichard Henderson     tcg_gen_sari_i64(d, a, sh);
273*09a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
274*09a52d85SRichard Henderson }
275*09a52d85SRichard Henderson 
276*09a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
277*09a52d85SRichard Henderson {
278*09a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
279*09a52d85SRichard Henderson     TCGv_vec ones = tcg_temp_new_vec_matching(d);
280*09a52d85SRichard Henderson 
281*09a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, sh - 1);
282*09a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, ones, 1);
283*09a52d85SRichard Henderson     tcg_gen_and_vec(vece, t, t, ones);
284*09a52d85SRichard Henderson     tcg_gen_sari_vec(vece, d, a, sh);
285*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
286*09a52d85SRichard Henderson }
287*09a52d85SRichard Henderson 
288*09a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
289*09a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
290*09a52d85SRichard Henderson {
291*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
292*09a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
293*09a52d85SRichard Henderson     };
294*09a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
295*09a52d85SRichard Henderson         { .fni8 = gen_srshr8_i64,
296*09a52d85SRichard Henderson           .fniv = gen_srshr_vec,
297*09a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_b,
298*09a52d85SRichard Henderson           .opt_opc = vecop_list,
299*09a52d85SRichard Henderson           .vece = MO_8 },
300*09a52d85SRichard Henderson         { .fni8 = gen_srshr16_i64,
301*09a52d85SRichard Henderson           .fniv = gen_srshr_vec,
302*09a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_h,
303*09a52d85SRichard Henderson           .opt_opc = vecop_list,
304*09a52d85SRichard Henderson           .vece = MO_16 },
305*09a52d85SRichard Henderson         { .fni4 = gen_srshr32_i32,
306*09a52d85SRichard Henderson           .fniv = gen_srshr_vec,
307*09a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_s,
308*09a52d85SRichard Henderson           .opt_opc = vecop_list,
309*09a52d85SRichard Henderson           .vece = MO_32 },
310*09a52d85SRichard Henderson         { .fni8 = gen_srshr64_i64,
311*09a52d85SRichard Henderson           .fniv = gen_srshr_vec,
312*09a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_d,
313*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
314*09a52d85SRichard Henderson           .opt_opc = vecop_list,
315*09a52d85SRichard Henderson           .vece = MO_64 },
316*09a52d85SRichard Henderson     };
317*09a52d85SRichard Henderson 
318*09a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
319*09a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
320*09a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
321*09a52d85SRichard Henderson 
322*09a52d85SRichard Henderson     if (shift == (8 << vece)) {
323*09a52d85SRichard Henderson         /*
324*09a52d85SRichard Henderson          * Shifts larger than the element size are architecturally valid.
325*09a52d85SRichard Henderson          * Signed results in all sign bits.  With rounding, this produces
326*09a52d85SRichard Henderson          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
327*09a52d85SRichard Henderson          * I.e. always zero.
328*09a52d85SRichard Henderson          */
329*09a52d85SRichard Henderson         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
330*09a52d85SRichard Henderson     } else {
331*09a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
332*09a52d85SRichard Henderson     }
333*09a52d85SRichard Henderson }
334*09a52d85SRichard Henderson 
335*09a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
336*09a52d85SRichard Henderson {
337*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
338*09a52d85SRichard Henderson 
339*09a52d85SRichard Henderson     gen_srshr8_i64(t, a, sh);
340*09a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
341*09a52d85SRichard Henderson }
342*09a52d85SRichard Henderson 
343*09a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
344*09a52d85SRichard Henderson {
345*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
346*09a52d85SRichard Henderson 
347*09a52d85SRichard Henderson     gen_srshr16_i64(t, a, sh);
348*09a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
349*09a52d85SRichard Henderson }
350*09a52d85SRichard Henderson 
351*09a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
352*09a52d85SRichard Henderson {
353*09a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
354*09a52d85SRichard Henderson 
355*09a52d85SRichard Henderson     gen_srshr32_i32(t, a, sh);
356*09a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
357*09a52d85SRichard Henderson }
358*09a52d85SRichard Henderson 
359*09a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
360*09a52d85SRichard Henderson {
361*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
362*09a52d85SRichard Henderson 
363*09a52d85SRichard Henderson     gen_srshr64_i64(t, a, sh);
364*09a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
365*09a52d85SRichard Henderson }
366*09a52d85SRichard Henderson 
367*09a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
368*09a52d85SRichard Henderson {
369*09a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
370*09a52d85SRichard Henderson 
371*09a52d85SRichard Henderson     gen_srshr_vec(vece, t, a, sh);
372*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
373*09a52d85SRichard Henderson }
374*09a52d85SRichard Henderson 
375*09a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
376*09a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
377*09a52d85SRichard Henderson {
378*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
379*09a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
380*09a52d85SRichard Henderson     };
381*09a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
382*09a52d85SRichard Henderson         { .fni8 = gen_srsra8_i64,
383*09a52d85SRichard Henderson           .fniv = gen_srsra_vec,
384*09a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_b,
385*09a52d85SRichard Henderson           .opt_opc = vecop_list,
386*09a52d85SRichard Henderson           .load_dest = true,
387*09a52d85SRichard Henderson           .vece = MO_8 },
388*09a52d85SRichard Henderson         { .fni8 = gen_srsra16_i64,
389*09a52d85SRichard Henderson           .fniv = gen_srsra_vec,
390*09a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_h,
391*09a52d85SRichard Henderson           .opt_opc = vecop_list,
392*09a52d85SRichard Henderson           .load_dest = true,
393*09a52d85SRichard Henderson           .vece = MO_16 },
394*09a52d85SRichard Henderson         { .fni4 = gen_srsra32_i32,
395*09a52d85SRichard Henderson           .fniv = gen_srsra_vec,
396*09a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_s,
397*09a52d85SRichard Henderson           .opt_opc = vecop_list,
398*09a52d85SRichard Henderson           .load_dest = true,
399*09a52d85SRichard Henderson           .vece = MO_32 },
400*09a52d85SRichard Henderson         { .fni8 = gen_srsra64_i64,
401*09a52d85SRichard Henderson           .fniv = gen_srsra_vec,
402*09a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_d,
403*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
404*09a52d85SRichard Henderson           .opt_opc = vecop_list,
405*09a52d85SRichard Henderson           .load_dest = true,
406*09a52d85SRichard Henderson           .vece = MO_64 },
407*09a52d85SRichard Henderson     };
408*09a52d85SRichard Henderson 
409*09a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
410*09a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
411*09a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
412*09a52d85SRichard Henderson 
413*09a52d85SRichard Henderson     /*
414*09a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
415*09a52d85SRichard Henderson      * Signed results in all sign bits.  With rounding, this produces
416*09a52d85SRichard Henderson      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
417*09a52d85SRichard Henderson      * I.e. always zero.  With accumulation, this leaves D unchanged.
418*09a52d85SRichard Henderson      */
419*09a52d85SRichard Henderson     if (shift == (8 << vece)) {
420*09a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
421*09a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
422*09a52d85SRichard Henderson     } else {
423*09a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
424*09a52d85SRichard Henderson     }
425*09a52d85SRichard Henderson }
426*09a52d85SRichard Henderson 
427*09a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
428*09a52d85SRichard Henderson {
429*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
430*09a52d85SRichard Henderson 
431*09a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
432*09a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
433*09a52d85SRichard Henderson     tcg_gen_vec_shr8i_i64(d, a, sh);
434*09a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
435*09a52d85SRichard Henderson }
436*09a52d85SRichard Henderson 
437*09a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
438*09a52d85SRichard Henderson {
439*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
440*09a52d85SRichard Henderson 
441*09a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
442*09a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
443*09a52d85SRichard Henderson     tcg_gen_vec_shr16i_i64(d, a, sh);
444*09a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
445*09a52d85SRichard Henderson }
446*09a52d85SRichard Henderson 
447*09a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
448*09a52d85SRichard Henderson {
449*09a52d85SRichard Henderson     TCGv_i32 t;
450*09a52d85SRichard Henderson 
451*09a52d85SRichard Henderson     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
452*09a52d85SRichard Henderson     if (sh == 32) {
453*09a52d85SRichard Henderson         tcg_gen_extract_i32(d, a, sh - 1, 1);
454*09a52d85SRichard Henderson         return;
455*09a52d85SRichard Henderson     }
456*09a52d85SRichard Henderson     t = tcg_temp_new_i32();
457*09a52d85SRichard Henderson     tcg_gen_extract_i32(t, a, sh - 1, 1);
458*09a52d85SRichard Henderson     tcg_gen_shri_i32(d, a, sh);
459*09a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
460*09a52d85SRichard Henderson }
461*09a52d85SRichard Henderson 
462*09a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
463*09a52d85SRichard Henderson {
464*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
465*09a52d85SRichard Henderson 
466*09a52d85SRichard Henderson     tcg_gen_extract_i64(t, a, sh - 1, 1);
467*09a52d85SRichard Henderson     tcg_gen_shri_i64(d, a, sh);
468*09a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
469*09a52d85SRichard Henderson }
470*09a52d85SRichard Henderson 
471*09a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
472*09a52d85SRichard Henderson {
473*09a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
474*09a52d85SRichard Henderson     TCGv_vec ones = tcg_temp_new_vec_matching(d);
475*09a52d85SRichard Henderson 
476*09a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, shift - 1);
477*09a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, ones, 1);
478*09a52d85SRichard Henderson     tcg_gen_and_vec(vece, t, t, ones);
479*09a52d85SRichard Henderson     tcg_gen_shri_vec(vece, d, a, shift);
480*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
481*09a52d85SRichard Henderson }
482*09a52d85SRichard Henderson 
483*09a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
484*09a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
485*09a52d85SRichard Henderson {
486*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
487*09a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
488*09a52d85SRichard Henderson     };
489*09a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
490*09a52d85SRichard Henderson         { .fni8 = gen_urshr8_i64,
491*09a52d85SRichard Henderson           .fniv = gen_urshr_vec,
492*09a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_b,
493*09a52d85SRichard Henderson           .opt_opc = vecop_list,
494*09a52d85SRichard Henderson           .vece = MO_8 },
495*09a52d85SRichard Henderson         { .fni8 = gen_urshr16_i64,
496*09a52d85SRichard Henderson           .fniv = gen_urshr_vec,
497*09a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_h,
498*09a52d85SRichard Henderson           .opt_opc = vecop_list,
499*09a52d85SRichard Henderson           .vece = MO_16 },
500*09a52d85SRichard Henderson         { .fni4 = gen_urshr32_i32,
501*09a52d85SRichard Henderson           .fniv = gen_urshr_vec,
502*09a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_s,
503*09a52d85SRichard Henderson           .opt_opc = vecop_list,
504*09a52d85SRichard Henderson           .vece = MO_32 },
505*09a52d85SRichard Henderson         { .fni8 = gen_urshr64_i64,
506*09a52d85SRichard Henderson           .fniv = gen_urshr_vec,
507*09a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_d,
508*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
509*09a52d85SRichard Henderson           .opt_opc = vecop_list,
510*09a52d85SRichard Henderson           .vece = MO_64 },
511*09a52d85SRichard Henderson     };
512*09a52d85SRichard Henderson 
513*09a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
514*09a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
515*09a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
516*09a52d85SRichard Henderson 
517*09a52d85SRichard Henderson     if (shift == (8 << vece)) {
518*09a52d85SRichard Henderson         /*
519*09a52d85SRichard Henderson          * Shifts larger than the element size are architecturally valid.
520*09a52d85SRichard Henderson          * Unsigned results in zero.  With rounding, this produces a
521*09a52d85SRichard Henderson          * copy of the most significant bit.
522*09a52d85SRichard Henderson          */
523*09a52d85SRichard Henderson         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
524*09a52d85SRichard Henderson     } else {
525*09a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
526*09a52d85SRichard Henderson     }
527*09a52d85SRichard Henderson }
528*09a52d85SRichard Henderson 
529*09a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
530*09a52d85SRichard Henderson {
531*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
532*09a52d85SRichard Henderson 
533*09a52d85SRichard Henderson     if (sh == 8) {
534*09a52d85SRichard Henderson         tcg_gen_vec_shr8i_i64(t, a, 7);
535*09a52d85SRichard Henderson     } else {
536*09a52d85SRichard Henderson         gen_urshr8_i64(t, a, sh);
537*09a52d85SRichard Henderson     }
538*09a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
539*09a52d85SRichard Henderson }
540*09a52d85SRichard Henderson 
541*09a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
542*09a52d85SRichard Henderson {
543*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
544*09a52d85SRichard Henderson 
545*09a52d85SRichard Henderson     if (sh == 16) {
546*09a52d85SRichard Henderson         tcg_gen_vec_shr16i_i64(t, a, 15);
547*09a52d85SRichard Henderson     } else {
548*09a52d85SRichard Henderson         gen_urshr16_i64(t, a, sh);
549*09a52d85SRichard Henderson     }
550*09a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
551*09a52d85SRichard Henderson }
552*09a52d85SRichard Henderson 
553*09a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
554*09a52d85SRichard Henderson {
555*09a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
556*09a52d85SRichard Henderson 
557*09a52d85SRichard Henderson     if (sh == 32) {
558*09a52d85SRichard Henderson         tcg_gen_shri_i32(t, a, 31);
559*09a52d85SRichard Henderson     } else {
560*09a52d85SRichard Henderson         gen_urshr32_i32(t, a, sh);
561*09a52d85SRichard Henderson     }
562*09a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
563*09a52d85SRichard Henderson }
564*09a52d85SRichard Henderson 
565*09a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
566*09a52d85SRichard Henderson {
567*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
568*09a52d85SRichard Henderson 
569*09a52d85SRichard Henderson     if (sh == 64) {
570*09a52d85SRichard Henderson         tcg_gen_shri_i64(t, a, 63);
571*09a52d85SRichard Henderson     } else {
572*09a52d85SRichard Henderson         gen_urshr64_i64(t, a, sh);
573*09a52d85SRichard Henderson     }
574*09a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
575*09a52d85SRichard Henderson }
576*09a52d85SRichard Henderson 
577*09a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
578*09a52d85SRichard Henderson {
579*09a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
580*09a52d85SRichard Henderson 
581*09a52d85SRichard Henderson     if (sh == (8 << vece)) {
582*09a52d85SRichard Henderson         tcg_gen_shri_vec(vece, t, a, sh - 1);
583*09a52d85SRichard Henderson     } else {
584*09a52d85SRichard Henderson         gen_urshr_vec(vece, t, a, sh);
585*09a52d85SRichard Henderson     }
586*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
587*09a52d85SRichard Henderson }
588*09a52d85SRichard Henderson 
589*09a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
590*09a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
591*09a52d85SRichard Henderson {
592*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
593*09a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
594*09a52d85SRichard Henderson     };
595*09a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
596*09a52d85SRichard Henderson         { .fni8 = gen_ursra8_i64,
597*09a52d85SRichard Henderson           .fniv = gen_ursra_vec,
598*09a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_b,
599*09a52d85SRichard Henderson           .opt_opc = vecop_list,
600*09a52d85SRichard Henderson           .load_dest = true,
601*09a52d85SRichard Henderson           .vece = MO_8 },
602*09a52d85SRichard Henderson         { .fni8 = gen_ursra16_i64,
603*09a52d85SRichard Henderson           .fniv = gen_ursra_vec,
604*09a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_h,
605*09a52d85SRichard Henderson           .opt_opc = vecop_list,
606*09a52d85SRichard Henderson           .load_dest = true,
607*09a52d85SRichard Henderson           .vece = MO_16 },
608*09a52d85SRichard Henderson         { .fni4 = gen_ursra32_i32,
609*09a52d85SRichard Henderson           .fniv = gen_ursra_vec,
610*09a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_s,
611*09a52d85SRichard Henderson           .opt_opc = vecop_list,
612*09a52d85SRichard Henderson           .load_dest = true,
613*09a52d85SRichard Henderson           .vece = MO_32 },
614*09a52d85SRichard Henderson         { .fni8 = gen_ursra64_i64,
615*09a52d85SRichard Henderson           .fniv = gen_ursra_vec,
616*09a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_d,
617*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
618*09a52d85SRichard Henderson           .opt_opc = vecop_list,
619*09a52d85SRichard Henderson           .load_dest = true,
620*09a52d85SRichard Henderson           .vece = MO_64 },
621*09a52d85SRichard Henderson     };
622*09a52d85SRichard Henderson 
623*09a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
624*09a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
625*09a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
626*09a52d85SRichard Henderson 
627*09a52d85SRichard Henderson     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
628*09a52d85SRichard Henderson }
629*09a52d85SRichard Henderson 
630*09a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
631*09a52d85SRichard Henderson {
632*09a52d85SRichard Henderson     uint64_t mask = dup_const(MO_8, 0xff >> shift);
633*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
634*09a52d85SRichard Henderson 
635*09a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, shift);
636*09a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
637*09a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
638*09a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
639*09a52d85SRichard Henderson }
640*09a52d85SRichard Henderson 
641*09a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
642*09a52d85SRichard Henderson {
643*09a52d85SRichard Henderson     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
644*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
645*09a52d85SRichard Henderson 
646*09a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, shift);
647*09a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
648*09a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
649*09a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
650*09a52d85SRichard Henderson }
651*09a52d85SRichard Henderson 
652*09a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
653*09a52d85SRichard Henderson {
654*09a52d85SRichard Henderson     tcg_gen_shri_i32(a, a, shift);
655*09a52d85SRichard Henderson     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
656*09a52d85SRichard Henderson }
657*09a52d85SRichard Henderson 
658*09a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
659*09a52d85SRichard Henderson {
660*09a52d85SRichard Henderson     tcg_gen_shri_i64(a, a, shift);
661*09a52d85SRichard Henderson     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
662*09a52d85SRichard Henderson }
663*09a52d85SRichard Henderson 
664*09a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
665*09a52d85SRichard Henderson {
666*09a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
667*09a52d85SRichard Henderson     TCGv_vec m = tcg_temp_new_vec_matching(d);
668*09a52d85SRichard Henderson 
669*09a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
670*09a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, sh);
671*09a52d85SRichard Henderson     tcg_gen_and_vec(vece, d, d, m);
672*09a52d85SRichard Henderson     tcg_gen_or_vec(vece, d, d, t);
673*09a52d85SRichard Henderson }
674*09a52d85SRichard Henderson 
675*09a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
676*09a52d85SRichard Henderson                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
677*09a52d85SRichard Henderson {
678*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
679*09a52d85SRichard Henderson     const GVecGen2i ops[4] = {
680*09a52d85SRichard Henderson         { .fni8 = gen_shr8_ins_i64,
681*09a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
682*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_b,
683*09a52d85SRichard Henderson           .load_dest = true,
684*09a52d85SRichard Henderson           .opt_opc = vecop_list,
685*09a52d85SRichard Henderson           .vece = MO_8 },
686*09a52d85SRichard Henderson         { .fni8 = gen_shr16_ins_i64,
687*09a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
688*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_h,
689*09a52d85SRichard Henderson           .load_dest = true,
690*09a52d85SRichard Henderson           .opt_opc = vecop_list,
691*09a52d85SRichard Henderson           .vece = MO_16 },
692*09a52d85SRichard Henderson         { .fni4 = gen_shr32_ins_i32,
693*09a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
694*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_s,
695*09a52d85SRichard Henderson           .load_dest = true,
696*09a52d85SRichard Henderson           .opt_opc = vecop_list,
697*09a52d85SRichard Henderson           .vece = MO_32 },
698*09a52d85SRichard Henderson         { .fni8 = gen_shr64_ins_i64,
699*09a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
700*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_d,
701*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
702*09a52d85SRichard Henderson           .load_dest = true,
703*09a52d85SRichard Henderson           .opt_opc = vecop_list,
704*09a52d85SRichard Henderson           .vece = MO_64 },
705*09a52d85SRichard Henderson     };
706*09a52d85SRichard Henderson 
707*09a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
708*09a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
709*09a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
710*09a52d85SRichard Henderson 
711*09a52d85SRichard Henderson     /* Shift of esize leaves destination unchanged. */
712*09a52d85SRichard Henderson     if (shift < (8 << vece)) {
713*09a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
714*09a52d85SRichard Henderson     } else {
715*09a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
716*09a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
717*09a52d85SRichard Henderson     }
718*09a52d85SRichard Henderson }
719*09a52d85SRichard Henderson 
720*09a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
721*09a52d85SRichard Henderson {
722*09a52d85SRichard Henderson     uint64_t mask = dup_const(MO_8, 0xff << shift);
723*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
724*09a52d85SRichard Henderson 
725*09a52d85SRichard Henderson     tcg_gen_shli_i64(t, a, shift);
726*09a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
727*09a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
728*09a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
729*09a52d85SRichard Henderson }
730*09a52d85SRichard Henderson 
731*09a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
732*09a52d85SRichard Henderson {
733*09a52d85SRichard Henderson     uint64_t mask = dup_const(MO_16, 0xffff << shift);
734*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
735*09a52d85SRichard Henderson 
736*09a52d85SRichard Henderson     tcg_gen_shli_i64(t, a, shift);
737*09a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
738*09a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
739*09a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
740*09a52d85SRichard Henderson }
741*09a52d85SRichard Henderson 
742*09a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
743*09a52d85SRichard Henderson {
744*09a52d85SRichard Henderson     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
745*09a52d85SRichard Henderson }
746*09a52d85SRichard Henderson 
747*09a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
748*09a52d85SRichard Henderson {
749*09a52d85SRichard Henderson     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
750*09a52d85SRichard Henderson }
751*09a52d85SRichard Henderson 
752*09a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
753*09a52d85SRichard Henderson {
754*09a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
755*09a52d85SRichard Henderson     TCGv_vec m = tcg_temp_new_vec_matching(d);
756*09a52d85SRichard Henderson 
757*09a52d85SRichard Henderson     tcg_gen_shli_vec(vece, t, a, sh);
758*09a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
759*09a52d85SRichard Henderson     tcg_gen_and_vec(vece, d, d, m);
760*09a52d85SRichard Henderson     tcg_gen_or_vec(vece, d, d, t);
761*09a52d85SRichard Henderson }
762*09a52d85SRichard Henderson 
763*09a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
764*09a52d85SRichard Henderson                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
765*09a52d85SRichard Henderson {
766*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
767*09a52d85SRichard Henderson     const GVecGen2i ops[4] = {
768*09a52d85SRichard Henderson         { .fni8 = gen_shl8_ins_i64,
769*09a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
770*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_b,
771*09a52d85SRichard Henderson           .load_dest = true,
772*09a52d85SRichard Henderson           .opt_opc = vecop_list,
773*09a52d85SRichard Henderson           .vece = MO_8 },
774*09a52d85SRichard Henderson         { .fni8 = gen_shl16_ins_i64,
775*09a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
776*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_h,
777*09a52d85SRichard Henderson           .load_dest = true,
778*09a52d85SRichard Henderson           .opt_opc = vecop_list,
779*09a52d85SRichard Henderson           .vece = MO_16 },
780*09a52d85SRichard Henderson         { .fni4 = gen_shl32_ins_i32,
781*09a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
782*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_s,
783*09a52d85SRichard Henderson           .load_dest = true,
784*09a52d85SRichard Henderson           .opt_opc = vecop_list,
785*09a52d85SRichard Henderson           .vece = MO_32 },
786*09a52d85SRichard Henderson         { .fni8 = gen_shl64_ins_i64,
787*09a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
788*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_d,
789*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
790*09a52d85SRichard Henderson           .load_dest = true,
791*09a52d85SRichard Henderson           .opt_opc = vecop_list,
792*09a52d85SRichard Henderson           .vece = MO_64 },
793*09a52d85SRichard Henderson     };
794*09a52d85SRichard Henderson 
795*09a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [0..esize-1]. */
796*09a52d85SRichard Henderson     tcg_debug_assert(shift >= 0);
797*09a52d85SRichard Henderson     tcg_debug_assert(shift < (8 << vece));
798*09a52d85SRichard Henderson 
799*09a52d85SRichard Henderson     if (shift == 0) {
800*09a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
801*09a52d85SRichard Henderson     } else {
802*09a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
803*09a52d85SRichard Henderson     }
804*09a52d85SRichard Henderson }
805*09a52d85SRichard Henderson 
806*09a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
807*09a52d85SRichard Henderson {
808*09a52d85SRichard Henderson     gen_helper_neon_mul_u8(a, a, b);
809*09a52d85SRichard Henderson     gen_helper_neon_add_u8(d, d, a);
810*09a52d85SRichard Henderson }
811*09a52d85SRichard Henderson 
812*09a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
813*09a52d85SRichard Henderson {
814*09a52d85SRichard Henderson     gen_helper_neon_mul_u8(a, a, b);
815*09a52d85SRichard Henderson     gen_helper_neon_sub_u8(d, d, a);
816*09a52d85SRichard Henderson }
817*09a52d85SRichard Henderson 
818*09a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
819*09a52d85SRichard Henderson {
820*09a52d85SRichard Henderson     gen_helper_neon_mul_u16(a, a, b);
821*09a52d85SRichard Henderson     gen_helper_neon_add_u16(d, d, a);
822*09a52d85SRichard Henderson }
823*09a52d85SRichard Henderson 
824*09a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
825*09a52d85SRichard Henderson {
826*09a52d85SRichard Henderson     gen_helper_neon_mul_u16(a, a, b);
827*09a52d85SRichard Henderson     gen_helper_neon_sub_u16(d, d, a);
828*09a52d85SRichard Henderson }
829*09a52d85SRichard Henderson 
830*09a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
831*09a52d85SRichard Henderson {
832*09a52d85SRichard Henderson     tcg_gen_mul_i32(a, a, b);
833*09a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
834*09a52d85SRichard Henderson }
835*09a52d85SRichard Henderson 
836*09a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
837*09a52d85SRichard Henderson {
838*09a52d85SRichard Henderson     tcg_gen_mul_i32(a, a, b);
839*09a52d85SRichard Henderson     tcg_gen_sub_i32(d, d, a);
840*09a52d85SRichard Henderson }
841*09a52d85SRichard Henderson 
842*09a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
843*09a52d85SRichard Henderson {
844*09a52d85SRichard Henderson     tcg_gen_mul_i64(a, a, b);
845*09a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
846*09a52d85SRichard Henderson }
847*09a52d85SRichard Henderson 
848*09a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
849*09a52d85SRichard Henderson {
850*09a52d85SRichard Henderson     tcg_gen_mul_i64(a, a, b);
851*09a52d85SRichard Henderson     tcg_gen_sub_i64(d, d, a);
852*09a52d85SRichard Henderson }
853*09a52d85SRichard Henderson 
854*09a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
855*09a52d85SRichard Henderson {
856*09a52d85SRichard Henderson     tcg_gen_mul_vec(vece, a, a, b);
857*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
858*09a52d85SRichard Henderson }
859*09a52d85SRichard Henderson 
860*09a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
861*09a52d85SRichard Henderson {
862*09a52d85SRichard Henderson     tcg_gen_mul_vec(vece, a, a, b);
863*09a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, a);
864*09a52d85SRichard Henderson }
865*09a52d85SRichard Henderson 
866*09a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
867*09a52d85SRichard Henderson  * these tables are shared with AArch64 which does support them.
868*09a52d85SRichard Henderson  */
869*09a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
870*09a52d85SRichard Henderson                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
871*09a52d85SRichard Henderson {
872*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
873*09a52d85SRichard Henderson         INDEX_op_mul_vec, INDEX_op_add_vec, 0
874*09a52d85SRichard Henderson     };
875*09a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
876*09a52d85SRichard Henderson         { .fni4 = gen_mla8_i32,
877*09a52d85SRichard Henderson           .fniv = gen_mla_vec,
878*09a52d85SRichard Henderson           .load_dest = true,
879*09a52d85SRichard Henderson           .opt_opc = vecop_list,
880*09a52d85SRichard Henderson           .vece = MO_8 },
881*09a52d85SRichard Henderson         { .fni4 = gen_mla16_i32,
882*09a52d85SRichard Henderson           .fniv = gen_mla_vec,
883*09a52d85SRichard Henderson           .load_dest = true,
884*09a52d85SRichard Henderson           .opt_opc = vecop_list,
885*09a52d85SRichard Henderson           .vece = MO_16 },
886*09a52d85SRichard Henderson         { .fni4 = gen_mla32_i32,
887*09a52d85SRichard Henderson           .fniv = gen_mla_vec,
888*09a52d85SRichard Henderson           .load_dest = true,
889*09a52d85SRichard Henderson           .opt_opc = vecop_list,
890*09a52d85SRichard Henderson           .vece = MO_32 },
891*09a52d85SRichard Henderson         { .fni8 = gen_mla64_i64,
892*09a52d85SRichard Henderson           .fniv = gen_mla_vec,
893*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
894*09a52d85SRichard Henderson           .load_dest = true,
895*09a52d85SRichard Henderson           .opt_opc = vecop_list,
896*09a52d85SRichard Henderson           .vece = MO_64 },
897*09a52d85SRichard Henderson     };
898*09a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
899*09a52d85SRichard Henderson }
900*09a52d85SRichard Henderson 
901*09a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
902*09a52d85SRichard Henderson                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
903*09a52d85SRichard Henderson {
904*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
905*09a52d85SRichard Henderson         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
906*09a52d85SRichard Henderson     };
907*09a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
908*09a52d85SRichard Henderson         { .fni4 = gen_mls8_i32,
909*09a52d85SRichard Henderson           .fniv = gen_mls_vec,
910*09a52d85SRichard Henderson           .load_dest = true,
911*09a52d85SRichard Henderson           .opt_opc = vecop_list,
912*09a52d85SRichard Henderson           .vece = MO_8 },
913*09a52d85SRichard Henderson         { .fni4 = gen_mls16_i32,
914*09a52d85SRichard Henderson           .fniv = gen_mls_vec,
915*09a52d85SRichard Henderson           .load_dest = true,
916*09a52d85SRichard Henderson           .opt_opc = vecop_list,
917*09a52d85SRichard Henderson           .vece = MO_16 },
918*09a52d85SRichard Henderson         { .fni4 = gen_mls32_i32,
919*09a52d85SRichard Henderson           .fniv = gen_mls_vec,
920*09a52d85SRichard Henderson           .load_dest = true,
921*09a52d85SRichard Henderson           .opt_opc = vecop_list,
922*09a52d85SRichard Henderson           .vece = MO_32 },
923*09a52d85SRichard Henderson         { .fni8 = gen_mls64_i64,
924*09a52d85SRichard Henderson           .fniv = gen_mls_vec,
925*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
926*09a52d85SRichard Henderson           .load_dest = true,
927*09a52d85SRichard Henderson           .opt_opc = vecop_list,
928*09a52d85SRichard Henderson           .vece = MO_64 },
929*09a52d85SRichard Henderson     };
930*09a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
931*09a52d85SRichard Henderson }
932*09a52d85SRichard Henderson 
933*09a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */
934*09a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
935*09a52d85SRichard Henderson {
936*09a52d85SRichard Henderson     tcg_gen_and_i32(d, a, b);
937*09a52d85SRichard Henderson     tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
938*09a52d85SRichard Henderson }
939*09a52d85SRichard Henderson 
940*09a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
941*09a52d85SRichard Henderson {
942*09a52d85SRichard Henderson     tcg_gen_and_i64(d, a, b);
943*09a52d85SRichard Henderson     tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
944*09a52d85SRichard Henderson }
945*09a52d85SRichard Henderson 
946*09a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
947*09a52d85SRichard Henderson {
948*09a52d85SRichard Henderson     tcg_gen_and_vec(vece, d, a, b);
949*09a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, a, 0);
950*09a52d85SRichard Henderson     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
951*09a52d85SRichard Henderson }
952*09a52d85SRichard Henderson 
953*09a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
954*09a52d85SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
955*09a52d85SRichard Henderson {
956*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
957*09a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
958*09a52d85SRichard Henderson         { .fni4 = gen_helper_neon_tst_u8,
959*09a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
960*09a52d85SRichard Henderson           .opt_opc = vecop_list,
961*09a52d85SRichard Henderson           .vece = MO_8 },
962*09a52d85SRichard Henderson         { .fni4 = gen_helper_neon_tst_u16,
963*09a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
964*09a52d85SRichard Henderson           .opt_opc = vecop_list,
965*09a52d85SRichard Henderson           .vece = MO_16 },
966*09a52d85SRichard Henderson         { .fni4 = gen_cmtst_i32,
967*09a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
968*09a52d85SRichard Henderson           .opt_opc = vecop_list,
969*09a52d85SRichard Henderson           .vece = MO_32 },
970*09a52d85SRichard Henderson         { .fni8 = gen_cmtst_i64,
971*09a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
972*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
973*09a52d85SRichard Henderson           .opt_opc = vecop_list,
974*09a52d85SRichard Henderson           .vece = MO_64 },
975*09a52d85SRichard Henderson     };
976*09a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
977*09a52d85SRichard Henderson }
978*09a52d85SRichard Henderson 
979*09a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
980*09a52d85SRichard Henderson {
981*09a52d85SRichard Henderson     TCGv_i32 lval = tcg_temp_new_i32();
982*09a52d85SRichard Henderson     TCGv_i32 rval = tcg_temp_new_i32();
983*09a52d85SRichard Henderson     TCGv_i32 lsh = tcg_temp_new_i32();
984*09a52d85SRichard Henderson     TCGv_i32 rsh = tcg_temp_new_i32();
985*09a52d85SRichard Henderson     TCGv_i32 zero = tcg_constant_i32(0);
986*09a52d85SRichard Henderson     TCGv_i32 max = tcg_constant_i32(32);
987*09a52d85SRichard Henderson 
988*09a52d85SRichard Henderson     /*
989*09a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
990*09a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
991*09a52d85SRichard Henderson      * Discard out-of-range results after the fact.
992*09a52d85SRichard Henderson      */
993*09a52d85SRichard Henderson     tcg_gen_ext8s_i32(lsh, shift);
994*09a52d85SRichard Henderson     tcg_gen_neg_i32(rsh, lsh);
995*09a52d85SRichard Henderson     tcg_gen_shl_i32(lval, src, lsh);
996*09a52d85SRichard Henderson     tcg_gen_shr_i32(rval, src, rsh);
997*09a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
998*09a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
999*09a52d85SRichard Henderson }
1000*09a52d85SRichard Henderson 
1001*09a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
1002*09a52d85SRichard Henderson {
1003*09a52d85SRichard Henderson     TCGv_i64 lval = tcg_temp_new_i64();
1004*09a52d85SRichard Henderson     TCGv_i64 rval = tcg_temp_new_i64();
1005*09a52d85SRichard Henderson     TCGv_i64 lsh = tcg_temp_new_i64();
1006*09a52d85SRichard Henderson     TCGv_i64 rsh = tcg_temp_new_i64();
1007*09a52d85SRichard Henderson     TCGv_i64 zero = tcg_constant_i64(0);
1008*09a52d85SRichard Henderson     TCGv_i64 max = tcg_constant_i64(64);
1009*09a52d85SRichard Henderson 
1010*09a52d85SRichard Henderson     /*
1011*09a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
1012*09a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
1013*09a52d85SRichard Henderson      * Discard out-of-range results after the fact.
1014*09a52d85SRichard Henderson      */
1015*09a52d85SRichard Henderson     tcg_gen_ext8s_i64(lsh, shift);
1016*09a52d85SRichard Henderson     tcg_gen_neg_i64(rsh, lsh);
1017*09a52d85SRichard Henderson     tcg_gen_shl_i64(lval, src, lsh);
1018*09a52d85SRichard Henderson     tcg_gen_shr_i64(rval, src, rsh);
1019*09a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
1020*09a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
1021*09a52d85SRichard Henderson }
1022*09a52d85SRichard Henderson 
1023*09a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
1024*09a52d85SRichard Henderson                          TCGv_vec src, TCGv_vec shift)
1025*09a52d85SRichard Henderson {
1026*09a52d85SRichard Henderson     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
1027*09a52d85SRichard Henderson     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
1028*09a52d85SRichard Henderson     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
1029*09a52d85SRichard Henderson     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1030*09a52d85SRichard Henderson     TCGv_vec msk, max;
1031*09a52d85SRichard Henderson 
1032*09a52d85SRichard Henderson     tcg_gen_neg_vec(vece, rsh, shift);
1033*09a52d85SRichard Henderson     if (vece == MO_8) {
1034*09a52d85SRichard Henderson         tcg_gen_mov_vec(lsh, shift);
1035*09a52d85SRichard Henderson     } else {
1036*09a52d85SRichard Henderson         msk = tcg_temp_new_vec_matching(dst);
1037*09a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, msk, 0xff);
1038*09a52d85SRichard Henderson         tcg_gen_and_vec(vece, lsh, shift, msk);
1039*09a52d85SRichard Henderson         tcg_gen_and_vec(vece, rsh, rsh, msk);
1040*09a52d85SRichard Henderson     }
1041*09a52d85SRichard Henderson 
1042*09a52d85SRichard Henderson     /*
1043*09a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
1044*09a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
1045*09a52d85SRichard Henderson      * Discard out-of-range results after the fact.
1046*09a52d85SRichard Henderson      */
1047*09a52d85SRichard Henderson     tcg_gen_shlv_vec(vece, lval, src, lsh);
1048*09a52d85SRichard Henderson     tcg_gen_shrv_vec(vece, rval, src, rsh);
1049*09a52d85SRichard Henderson 
1050*09a52d85SRichard Henderson     max = tcg_temp_new_vec_matching(dst);
1051*09a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, max, 8 << vece);
1052*09a52d85SRichard Henderson 
1053*09a52d85SRichard Henderson     /*
1054*09a52d85SRichard Henderson      * The choice of LT (signed) and GEU (unsigned) are biased toward
1055*09a52d85SRichard Henderson      * the instructions of the x86_64 host.  For MO_8, the whole byte
1056*09a52d85SRichard Henderson      * is significant so we must use an unsigned compare; otherwise we
1057*09a52d85SRichard Henderson      * have already masked to a byte and so a signed compare works.
1058*09a52d85SRichard Henderson      * Other tcg hosts have a full set of comparisons and do not care.
1059*09a52d85SRichard Henderson      */
1060*09a52d85SRichard Henderson     if (vece == MO_8) {
1061*09a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
1062*09a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
1063*09a52d85SRichard Henderson         tcg_gen_andc_vec(vece, lval, lval, lsh);
1064*09a52d85SRichard Henderson         tcg_gen_andc_vec(vece, rval, rval, rsh);
1065*09a52d85SRichard Henderson     } else {
1066*09a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
1067*09a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
1068*09a52d85SRichard Henderson         tcg_gen_and_vec(vece, lval, lval, lsh);
1069*09a52d85SRichard Henderson         tcg_gen_and_vec(vece, rval, rval, rsh);
1070*09a52d85SRichard Henderson     }
1071*09a52d85SRichard Henderson     tcg_gen_or_vec(vece, dst, lval, rval);
1072*09a52d85SRichard Henderson }
1073*09a52d85SRichard Henderson 
1074*09a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1075*09a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1076*09a52d85SRichard Henderson {
1077*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
1078*09a52d85SRichard Henderson         INDEX_op_neg_vec, INDEX_op_shlv_vec,
1079*09a52d85SRichard Henderson         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
1080*09a52d85SRichard Henderson     };
1081*09a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
1082*09a52d85SRichard Henderson         { .fniv = gen_ushl_vec,
1083*09a52d85SRichard Henderson           .fno = gen_helper_gvec_ushl_b,
1084*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1085*09a52d85SRichard Henderson           .vece = MO_8 },
1086*09a52d85SRichard Henderson         { .fniv = gen_ushl_vec,
1087*09a52d85SRichard Henderson           .fno = gen_helper_gvec_ushl_h,
1088*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1089*09a52d85SRichard Henderson           .vece = MO_16 },
1090*09a52d85SRichard Henderson         { .fni4 = gen_ushl_i32,
1091*09a52d85SRichard Henderson           .fniv = gen_ushl_vec,
1092*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1093*09a52d85SRichard Henderson           .vece = MO_32 },
1094*09a52d85SRichard Henderson         { .fni8 = gen_ushl_i64,
1095*09a52d85SRichard Henderson           .fniv = gen_ushl_vec,
1096*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1097*09a52d85SRichard Henderson           .vece = MO_64 },
1098*09a52d85SRichard Henderson     };
1099*09a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1100*09a52d85SRichard Henderson }
1101*09a52d85SRichard Henderson 
1102*09a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
1103*09a52d85SRichard Henderson {
1104*09a52d85SRichard Henderson     TCGv_i32 lval = tcg_temp_new_i32();
1105*09a52d85SRichard Henderson     TCGv_i32 rval = tcg_temp_new_i32();
1106*09a52d85SRichard Henderson     TCGv_i32 lsh = tcg_temp_new_i32();
1107*09a52d85SRichard Henderson     TCGv_i32 rsh = tcg_temp_new_i32();
1108*09a52d85SRichard Henderson     TCGv_i32 zero = tcg_constant_i32(0);
1109*09a52d85SRichard Henderson     TCGv_i32 max = tcg_constant_i32(31);
1110*09a52d85SRichard Henderson 
1111*09a52d85SRichard Henderson     /*
1112*09a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
1113*09a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
1114*09a52d85SRichard Henderson      * Discard out-of-range results after the fact.
1115*09a52d85SRichard Henderson      */
1116*09a52d85SRichard Henderson     tcg_gen_ext8s_i32(lsh, shift);
1117*09a52d85SRichard Henderson     tcg_gen_neg_i32(rsh, lsh);
1118*09a52d85SRichard Henderson     tcg_gen_shl_i32(lval, src, lsh);
1119*09a52d85SRichard Henderson     tcg_gen_umin_i32(rsh, rsh, max);
1120*09a52d85SRichard Henderson     tcg_gen_sar_i32(rval, src, rsh);
1121*09a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
1122*09a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
1123*09a52d85SRichard Henderson }
1124*09a52d85SRichard Henderson 
1125*09a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
1126*09a52d85SRichard Henderson {
1127*09a52d85SRichard Henderson     TCGv_i64 lval = tcg_temp_new_i64();
1128*09a52d85SRichard Henderson     TCGv_i64 rval = tcg_temp_new_i64();
1129*09a52d85SRichard Henderson     TCGv_i64 lsh = tcg_temp_new_i64();
1130*09a52d85SRichard Henderson     TCGv_i64 rsh = tcg_temp_new_i64();
1131*09a52d85SRichard Henderson     TCGv_i64 zero = tcg_constant_i64(0);
1132*09a52d85SRichard Henderson     TCGv_i64 max = tcg_constant_i64(63);
1133*09a52d85SRichard Henderson 
1134*09a52d85SRichard Henderson     /*
1135*09a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
1136*09a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
1137*09a52d85SRichard Henderson      * Discard out-of-range results after the fact.
1138*09a52d85SRichard Henderson      */
1139*09a52d85SRichard Henderson     tcg_gen_ext8s_i64(lsh, shift);
1140*09a52d85SRichard Henderson     tcg_gen_neg_i64(rsh, lsh);
1141*09a52d85SRichard Henderson     tcg_gen_shl_i64(lval, src, lsh);
1142*09a52d85SRichard Henderson     tcg_gen_umin_i64(rsh, rsh, max);
1143*09a52d85SRichard Henderson     tcg_gen_sar_i64(rval, src, rsh);
1144*09a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
1145*09a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
1146*09a52d85SRichard Henderson }
1147*09a52d85SRichard Henderson 
1148*09a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
1149*09a52d85SRichard Henderson                          TCGv_vec src, TCGv_vec shift)
1150*09a52d85SRichard Henderson {
1151*09a52d85SRichard Henderson     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
1152*09a52d85SRichard Henderson     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
1153*09a52d85SRichard Henderson     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
1154*09a52d85SRichard Henderson     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1155*09a52d85SRichard Henderson     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
1156*09a52d85SRichard Henderson 
1157*09a52d85SRichard Henderson     /*
1158*09a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
1159*09a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
1160*09a52d85SRichard Henderson      * Discard out-of-range results after the fact.
1161*09a52d85SRichard Henderson      */
1162*09a52d85SRichard Henderson     tcg_gen_neg_vec(vece, rsh, shift);
1163*09a52d85SRichard Henderson     if (vece == MO_8) {
1164*09a52d85SRichard Henderson         tcg_gen_mov_vec(lsh, shift);
1165*09a52d85SRichard Henderson     } else {
1166*09a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, tmp, 0xff);
1167*09a52d85SRichard Henderson         tcg_gen_and_vec(vece, lsh, shift, tmp);
1168*09a52d85SRichard Henderson         tcg_gen_and_vec(vece, rsh, rsh, tmp);
1169*09a52d85SRichard Henderson     }
1170*09a52d85SRichard Henderson 
1171*09a52d85SRichard Henderson     /* Bound rsh so out of bound right shift gets -1.  */
1172*09a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
1173*09a52d85SRichard Henderson     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
1174*09a52d85SRichard Henderson     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
1175*09a52d85SRichard Henderson 
1176*09a52d85SRichard Henderson     tcg_gen_shlv_vec(vece, lval, src, lsh);
1177*09a52d85SRichard Henderson     tcg_gen_sarv_vec(vece, rval, src, rsh);
1178*09a52d85SRichard Henderson 
1179*09a52d85SRichard Henderson     /* Select in-bound left shift.  */
1180*09a52d85SRichard Henderson     tcg_gen_andc_vec(vece, lval, lval, tmp);
1181*09a52d85SRichard Henderson 
1182*09a52d85SRichard Henderson     /* Select between left and right shift.  */
1183*09a52d85SRichard Henderson     if (vece == MO_8) {
1184*09a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, tmp, 0);
1185*09a52d85SRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
1186*09a52d85SRichard Henderson     } else {
1187*09a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, tmp, 0x80);
1188*09a52d85SRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
1189*09a52d85SRichard Henderson     }
1190*09a52d85SRichard Henderson }
1191*09a52d85SRichard Henderson 
1192*09a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1193*09a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1194*09a52d85SRichard Henderson {
1195*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
1196*09a52d85SRichard Henderson         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
1197*09a52d85SRichard Henderson         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
1198*09a52d85SRichard Henderson     };
1199*09a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
1200*09a52d85SRichard Henderson         { .fniv = gen_sshl_vec,
1201*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sshl_b,
1202*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1203*09a52d85SRichard Henderson           .vece = MO_8 },
1204*09a52d85SRichard Henderson         { .fniv = gen_sshl_vec,
1205*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sshl_h,
1206*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1207*09a52d85SRichard Henderson           .vece = MO_16 },
1208*09a52d85SRichard Henderson         { .fni4 = gen_sshl_i32,
1209*09a52d85SRichard Henderson           .fniv = gen_sshl_vec,
1210*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1211*09a52d85SRichard Henderson           .vece = MO_32 },
1212*09a52d85SRichard Henderson         { .fni8 = gen_sshl_i64,
1213*09a52d85SRichard Henderson           .fniv = gen_sshl_vec,
1214*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1215*09a52d85SRichard Henderson           .vece = MO_64 },
1216*09a52d85SRichard Henderson     };
1217*09a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1218*09a52d85SRichard Henderson }
1219*09a52d85SRichard Henderson 
1220*09a52d85SRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1221*09a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
1222*09a52d85SRichard Henderson {
1223*09a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
1224*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, x, a, b);
1225*09a52d85SRichard Henderson     tcg_gen_usadd_vec(vece, t, a, b);
1226*09a52d85SRichard Henderson     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1227*09a52d85SRichard Henderson     tcg_gen_or_vec(vece, sat, sat, x);
1228*09a52d85SRichard Henderson }
1229*09a52d85SRichard Henderson 
1230*09a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1231*09a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1232*09a52d85SRichard Henderson {
1233*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
1234*09a52d85SRichard Henderson         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
1235*09a52d85SRichard Henderson     };
1236*09a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
1237*09a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
1238*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_b,
1239*09a52d85SRichard Henderson           .write_aofs = true,
1240*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1241*09a52d85SRichard Henderson           .vece = MO_8 },
1242*09a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
1243*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_h,
1244*09a52d85SRichard Henderson           .write_aofs = true,
1245*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1246*09a52d85SRichard Henderson           .vece = MO_16 },
1247*09a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
1248*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_s,
1249*09a52d85SRichard Henderson           .write_aofs = true,
1250*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1251*09a52d85SRichard Henderson           .vece = MO_32 },
1252*09a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
1253*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_d,
1254*09a52d85SRichard Henderson           .write_aofs = true,
1255*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1256*09a52d85SRichard Henderson           .vece = MO_64 },
1257*09a52d85SRichard Henderson     };
1258*09a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1259*09a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1260*09a52d85SRichard Henderson }
1261*09a52d85SRichard Henderson 
1262*09a52d85SRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1263*09a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
1264*09a52d85SRichard Henderson {
1265*09a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
1266*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, x, a, b);
1267*09a52d85SRichard Henderson     tcg_gen_ssadd_vec(vece, t, a, b);
1268*09a52d85SRichard Henderson     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1269*09a52d85SRichard Henderson     tcg_gen_or_vec(vece, sat, sat, x);
1270*09a52d85SRichard Henderson }
1271*09a52d85SRichard Henderson 
1272*09a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1273*09a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1274*09a52d85SRichard Henderson {
1275*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
1276*09a52d85SRichard Henderson         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
1277*09a52d85SRichard Henderson     };
1278*09a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
1279*09a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
1280*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_b,
1281*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1282*09a52d85SRichard Henderson           .write_aofs = true,
1283*09a52d85SRichard Henderson           .vece = MO_8 },
1284*09a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
1285*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_h,
1286*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1287*09a52d85SRichard Henderson           .write_aofs = true,
1288*09a52d85SRichard Henderson           .vece = MO_16 },
1289*09a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
1290*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_s,
1291*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1292*09a52d85SRichard Henderson           .write_aofs = true,
1293*09a52d85SRichard Henderson           .vece = MO_32 },
1294*09a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
1295*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_d,
1296*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1297*09a52d85SRichard Henderson           .write_aofs = true,
1298*09a52d85SRichard Henderson           .vece = MO_64 },
1299*09a52d85SRichard Henderson     };
1300*09a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1301*09a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1302*09a52d85SRichard Henderson }
1303*09a52d85SRichard Henderson 
1304*09a52d85SRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1305*09a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
1306*09a52d85SRichard Henderson {
1307*09a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
1308*09a52d85SRichard Henderson     tcg_gen_sub_vec(vece, x, a, b);
1309*09a52d85SRichard Henderson     tcg_gen_ussub_vec(vece, t, a, b);
1310*09a52d85SRichard Henderson     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1311*09a52d85SRichard Henderson     tcg_gen_or_vec(vece, sat, sat, x);
1312*09a52d85SRichard Henderson }
1313*09a52d85SRichard Henderson 
1314*09a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1315*09a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1316*09a52d85SRichard Henderson {
1317*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
1318*09a52d85SRichard Henderson         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
1319*09a52d85SRichard Henderson     };
1320*09a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
1321*09a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
1322*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_b,
1323*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1324*09a52d85SRichard Henderson           .write_aofs = true,
1325*09a52d85SRichard Henderson           .vece = MO_8 },
1326*09a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
1327*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_h,
1328*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1329*09a52d85SRichard Henderson           .write_aofs = true,
1330*09a52d85SRichard Henderson           .vece = MO_16 },
1331*09a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
1332*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_s,
1333*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1334*09a52d85SRichard Henderson           .write_aofs = true,
1335*09a52d85SRichard Henderson           .vece = MO_32 },
1336*09a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
1337*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_d,
1338*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1339*09a52d85SRichard Henderson           .write_aofs = true,
1340*09a52d85SRichard Henderson           .vece = MO_64 },
1341*09a52d85SRichard Henderson     };
1342*09a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1343*09a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1344*09a52d85SRichard Henderson }
1345*09a52d85SRichard Henderson 
1346*09a52d85SRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1347*09a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
1348*09a52d85SRichard Henderson {
1349*09a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
1350*09a52d85SRichard Henderson     tcg_gen_sub_vec(vece, x, a, b);
1351*09a52d85SRichard Henderson     tcg_gen_sssub_vec(vece, t, a, b);
1352*09a52d85SRichard Henderson     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1353*09a52d85SRichard Henderson     tcg_gen_or_vec(vece, sat, sat, x);
1354*09a52d85SRichard Henderson }
1355*09a52d85SRichard Henderson 
1356*09a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1357*09a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1358*09a52d85SRichard Henderson {
1359*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
1360*09a52d85SRichard Henderson         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
1361*09a52d85SRichard Henderson     };
1362*09a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
1363*09a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
1364*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_b,
1365*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1366*09a52d85SRichard Henderson           .write_aofs = true,
1367*09a52d85SRichard Henderson           .vece = MO_8 },
1368*09a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
1369*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_h,
1370*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1371*09a52d85SRichard Henderson           .write_aofs = true,
1372*09a52d85SRichard Henderson           .vece = MO_16 },
1373*09a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
1374*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_s,
1375*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1376*09a52d85SRichard Henderson           .write_aofs = true,
1377*09a52d85SRichard Henderson           .vece = MO_32 },
1378*09a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
1379*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_d,
1380*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1381*09a52d85SRichard Henderson           .write_aofs = true,
1382*09a52d85SRichard Henderson           .vece = MO_64 },
1383*09a52d85SRichard Henderson     };
1384*09a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1385*09a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1386*09a52d85SRichard Henderson }
1387*09a52d85SRichard Henderson 
1388*09a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1389*09a52d85SRichard Henderson {
1390*09a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
1391*09a52d85SRichard Henderson 
1392*09a52d85SRichard Henderson     tcg_gen_sub_i32(t, a, b);
1393*09a52d85SRichard Henderson     tcg_gen_sub_i32(d, b, a);
1394*09a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
1395*09a52d85SRichard Henderson }
1396*09a52d85SRichard Henderson 
1397*09a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1398*09a52d85SRichard Henderson {
1399*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1400*09a52d85SRichard Henderson 
1401*09a52d85SRichard Henderson     tcg_gen_sub_i64(t, a, b);
1402*09a52d85SRichard Henderson     tcg_gen_sub_i64(d, b, a);
1403*09a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
1404*09a52d85SRichard Henderson }
1405*09a52d85SRichard Henderson 
1406*09a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1407*09a52d85SRichard Henderson {
1408*09a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
1409*09a52d85SRichard Henderson 
1410*09a52d85SRichard Henderson     tcg_gen_smin_vec(vece, t, a, b);
1411*09a52d85SRichard Henderson     tcg_gen_smax_vec(vece, d, a, b);
1412*09a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
1413*09a52d85SRichard Henderson }
1414*09a52d85SRichard Henderson 
1415*09a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1416*09a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1417*09a52d85SRichard Henderson {
1418*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
1419*09a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
1420*09a52d85SRichard Henderson     };
1421*09a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
1422*09a52d85SRichard Henderson         { .fniv = gen_sabd_vec,
1423*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_b,
1424*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1425*09a52d85SRichard Henderson           .vece = MO_8 },
1426*09a52d85SRichard Henderson         { .fniv = gen_sabd_vec,
1427*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_h,
1428*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1429*09a52d85SRichard Henderson           .vece = MO_16 },
1430*09a52d85SRichard Henderson         { .fni4 = gen_sabd_i32,
1431*09a52d85SRichard Henderson           .fniv = gen_sabd_vec,
1432*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_s,
1433*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1434*09a52d85SRichard Henderson           .vece = MO_32 },
1435*09a52d85SRichard Henderson         { .fni8 = gen_sabd_i64,
1436*09a52d85SRichard Henderson           .fniv = gen_sabd_vec,
1437*09a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_d,
1438*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1439*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1440*09a52d85SRichard Henderson           .vece = MO_64 },
1441*09a52d85SRichard Henderson     };
1442*09a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1443*09a52d85SRichard Henderson }
1444*09a52d85SRichard Henderson 
1445*09a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1446*09a52d85SRichard Henderson {
1447*09a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
1448*09a52d85SRichard Henderson 
1449*09a52d85SRichard Henderson     tcg_gen_sub_i32(t, a, b);
1450*09a52d85SRichard Henderson     tcg_gen_sub_i32(d, b, a);
1451*09a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
1452*09a52d85SRichard Henderson }
1453*09a52d85SRichard Henderson 
1454*09a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1455*09a52d85SRichard Henderson {
1456*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1457*09a52d85SRichard Henderson 
1458*09a52d85SRichard Henderson     tcg_gen_sub_i64(t, a, b);
1459*09a52d85SRichard Henderson     tcg_gen_sub_i64(d, b, a);
1460*09a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
1461*09a52d85SRichard Henderson }
1462*09a52d85SRichard Henderson 
1463*09a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1464*09a52d85SRichard Henderson {
1465*09a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
1466*09a52d85SRichard Henderson 
1467*09a52d85SRichard Henderson     tcg_gen_umin_vec(vece, t, a, b);
1468*09a52d85SRichard Henderson     tcg_gen_umax_vec(vece, d, a, b);
1469*09a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
1470*09a52d85SRichard Henderson }
1471*09a52d85SRichard Henderson 
1472*09a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1473*09a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1474*09a52d85SRichard Henderson {
1475*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
1476*09a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
1477*09a52d85SRichard Henderson     };
1478*09a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
1479*09a52d85SRichard Henderson         { .fniv = gen_uabd_vec,
1480*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_b,
1481*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1482*09a52d85SRichard Henderson           .vece = MO_8 },
1483*09a52d85SRichard Henderson         { .fniv = gen_uabd_vec,
1484*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_h,
1485*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1486*09a52d85SRichard Henderson           .vece = MO_16 },
1487*09a52d85SRichard Henderson         { .fni4 = gen_uabd_i32,
1488*09a52d85SRichard Henderson           .fniv = gen_uabd_vec,
1489*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_s,
1490*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1491*09a52d85SRichard Henderson           .vece = MO_32 },
1492*09a52d85SRichard Henderson         { .fni8 = gen_uabd_i64,
1493*09a52d85SRichard Henderson           .fniv = gen_uabd_vec,
1494*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_d,
1495*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1496*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1497*09a52d85SRichard Henderson           .vece = MO_64 },
1498*09a52d85SRichard Henderson     };
1499*09a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1500*09a52d85SRichard Henderson }
1501*09a52d85SRichard Henderson 
1502*09a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1503*09a52d85SRichard Henderson {
1504*09a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
1505*09a52d85SRichard Henderson     gen_sabd_i32(t, a, b);
1506*09a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
1507*09a52d85SRichard Henderson }
1508*09a52d85SRichard Henderson 
1509*09a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1510*09a52d85SRichard Henderson {
1511*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1512*09a52d85SRichard Henderson     gen_sabd_i64(t, a, b);
1513*09a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
1514*09a52d85SRichard Henderson }
1515*09a52d85SRichard Henderson 
1516*09a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1517*09a52d85SRichard Henderson {
1518*09a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
1519*09a52d85SRichard Henderson     gen_sabd_vec(vece, t, a, b);
1520*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
1521*09a52d85SRichard Henderson }
1522*09a52d85SRichard Henderson 
1523*09a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1524*09a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1525*09a52d85SRichard Henderson {
1526*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
1527*09a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_add_vec,
1528*09a52d85SRichard Henderson         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
1529*09a52d85SRichard Henderson     };
1530*09a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
1531*09a52d85SRichard Henderson         { .fniv = gen_saba_vec,
1532*09a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_b,
1533*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1534*09a52d85SRichard Henderson           .load_dest = true,
1535*09a52d85SRichard Henderson           .vece = MO_8 },
1536*09a52d85SRichard Henderson         { .fniv = gen_saba_vec,
1537*09a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_h,
1538*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1539*09a52d85SRichard Henderson           .load_dest = true,
1540*09a52d85SRichard Henderson           .vece = MO_16 },
1541*09a52d85SRichard Henderson         { .fni4 = gen_saba_i32,
1542*09a52d85SRichard Henderson           .fniv = gen_saba_vec,
1543*09a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_s,
1544*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1545*09a52d85SRichard Henderson           .load_dest = true,
1546*09a52d85SRichard Henderson           .vece = MO_32 },
1547*09a52d85SRichard Henderson         { .fni8 = gen_saba_i64,
1548*09a52d85SRichard Henderson           .fniv = gen_saba_vec,
1549*09a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_d,
1550*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1551*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1552*09a52d85SRichard Henderson           .load_dest = true,
1553*09a52d85SRichard Henderson           .vece = MO_64 },
1554*09a52d85SRichard Henderson     };
1555*09a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1556*09a52d85SRichard Henderson }
1557*09a52d85SRichard Henderson 
1558*09a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1559*09a52d85SRichard Henderson {
1560*09a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
1561*09a52d85SRichard Henderson     gen_uabd_i32(t, a, b);
1562*09a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
1563*09a52d85SRichard Henderson }
1564*09a52d85SRichard Henderson 
1565*09a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1566*09a52d85SRichard Henderson {
1567*09a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1568*09a52d85SRichard Henderson     gen_uabd_i64(t, a, b);
1569*09a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
1570*09a52d85SRichard Henderson }
1571*09a52d85SRichard Henderson 
1572*09a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1573*09a52d85SRichard Henderson {
1574*09a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
1575*09a52d85SRichard Henderson     gen_uabd_vec(vece, t, a, b);
1576*09a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
1577*09a52d85SRichard Henderson }
1578*09a52d85SRichard Henderson 
1579*09a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1580*09a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1581*09a52d85SRichard Henderson {
1582*09a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
1583*09a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_add_vec,
1584*09a52d85SRichard Henderson         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
1585*09a52d85SRichard Henderson     };
1586*09a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
1587*09a52d85SRichard Henderson         { .fniv = gen_uaba_vec,
1588*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_b,
1589*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1590*09a52d85SRichard Henderson           .load_dest = true,
1591*09a52d85SRichard Henderson           .vece = MO_8 },
1592*09a52d85SRichard Henderson         { .fniv = gen_uaba_vec,
1593*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_h,
1594*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1595*09a52d85SRichard Henderson           .load_dest = true,
1596*09a52d85SRichard Henderson           .vece = MO_16 },
1597*09a52d85SRichard Henderson         { .fni4 = gen_uaba_i32,
1598*09a52d85SRichard Henderson           .fniv = gen_uaba_vec,
1599*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_s,
1600*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1601*09a52d85SRichard Henderson           .load_dest = true,
1602*09a52d85SRichard Henderson           .vece = MO_32 },
1603*09a52d85SRichard Henderson         { .fni8 = gen_uaba_i64,
1604*09a52d85SRichard Henderson           .fniv = gen_uaba_vec,
1605*09a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_d,
1606*09a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1607*09a52d85SRichard Henderson           .opt_opc = vecop_list,
1608*09a52d85SRichard Henderson           .load_dest = true,
1609*09a52d85SRichard Henderson           .vece = MO_64 },
1610*09a52d85SRichard Henderson     };
1611*09a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1612*09a52d85SRichard Henderson }
1613