1 /*
2 * ARM generic vector expansion
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include "qemu/osdep.h"
23 #include "translate.h"
24
25
gen_gvec_fn3_qc(uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz,gen_helper_gvec_3_ptr * fn)26 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
27 uint32_t opr_sz, uint32_t max_sz,
28 gen_helper_gvec_3_ptr *fn)
29 {
30 TCGv_ptr qc_ptr = tcg_temp_new_ptr();
31
32 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
33 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
34 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
35 opr_sz, max_sz, 0, fn);
36 }
37
gen_gvec_sqdmulh_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)38 void gen_gvec_sqdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
39 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
40 {
41 static gen_helper_gvec_3_ptr * const fns[2] = {
42 gen_helper_neon_sqdmulh_h, gen_helper_neon_sqdmulh_s
43 };
44 tcg_debug_assert(vece >= 1 && vece <= 2);
45 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
46 }
47
gen_gvec_sqrdmulh_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)48 void gen_gvec_sqrdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
49 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
50 {
51 static gen_helper_gvec_3_ptr * const fns[2] = {
52 gen_helper_neon_sqrdmulh_h, gen_helper_neon_sqrdmulh_s
53 };
54 tcg_debug_assert(vece >= 1 && vece <= 2);
55 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
56 }
57
gen_gvec_sqrdmlah_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)58 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
59 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
60 {
61 static gen_helper_gvec_3_ptr * const fns[2] = {
62 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
63 };
64 tcg_debug_assert(vece >= 1 && vece <= 2);
65 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
66 }
67
gen_gvec_sqrdmlsh_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)68 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
69 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
70 {
71 static gen_helper_gvec_3_ptr * const fns[2] = {
72 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
73 };
74 tcg_debug_assert(vece >= 1 && vece <= 2);
75 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
76 }
77
78 #define GEN_CMP0(NAME, COND) \
79 void NAME(unsigned vece, uint32_t d, uint32_t m, \
80 uint32_t opr_sz, uint32_t max_sz) \
81 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
82
GEN_CMP0(gen_gvec_ceq0,TCG_COND_EQ)83 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
84 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
85 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
86 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
87 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
88
89 #undef GEN_CMP0
90
91 void gen_gvec_sshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
92 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
93 {
94 /* Signed shift out of range results in all-sign-bits */
95 shift = MIN(shift, (8 << vece) - 1);
96 tcg_gen_gvec_sari(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz);
97 }
98
gen_gvec_ushr(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)99 void gen_gvec_ushr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
100 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
101 {
102 /* Unsigned shift out of range results in all-zero-bits */
103 if (shift >= (8 << vece)) {
104 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
105 } else {
106 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz);
107 }
108 }
109
gen_ssra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)110 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
111 {
112 tcg_gen_vec_sar8i_i64(a, a, shift);
113 tcg_gen_vec_add8_i64(d, d, a);
114 }
115
gen_ssra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)116 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
117 {
118 tcg_gen_vec_sar16i_i64(a, a, shift);
119 tcg_gen_vec_add16_i64(d, d, a);
120 }
121
gen_ssra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)122 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
123 {
124 tcg_gen_sari_i32(a, a, shift);
125 tcg_gen_add_i32(d, d, a);
126 }
127
gen_ssra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)128 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
129 {
130 tcg_gen_sari_i64(a, a, shift);
131 tcg_gen_add_i64(d, d, a);
132 }
133
gen_ssra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)134 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
135 {
136 tcg_gen_sari_vec(vece, a, a, sh);
137 tcg_gen_add_vec(vece, d, d, a);
138 }
139
gen_gvec_ssra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)140 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
141 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
142 {
143 static const TCGOpcode vecop_list[] = {
144 INDEX_op_sari_vec, INDEX_op_add_vec, 0
145 };
146 static const GVecGen2i ops[4] = {
147 { .fni8 = gen_ssra8_i64,
148 .fniv = gen_ssra_vec,
149 .fno = gen_helper_gvec_ssra_b,
150 .load_dest = true,
151 .opt_opc = vecop_list,
152 .vece = MO_8 },
153 { .fni8 = gen_ssra16_i64,
154 .fniv = gen_ssra_vec,
155 .fno = gen_helper_gvec_ssra_h,
156 .load_dest = true,
157 .opt_opc = vecop_list,
158 .vece = MO_16 },
159 { .fni4 = gen_ssra32_i32,
160 .fniv = gen_ssra_vec,
161 .fno = gen_helper_gvec_ssra_s,
162 .load_dest = true,
163 .opt_opc = vecop_list,
164 .vece = MO_32 },
165 { .fni8 = gen_ssra64_i64,
166 .fniv = gen_ssra_vec,
167 .fno = gen_helper_gvec_ssra_d,
168 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
169 .opt_opc = vecop_list,
170 .load_dest = true,
171 .vece = MO_64 },
172 };
173
174 /* tszimm encoding produces immediates in the range [1..esize]. */
175 tcg_debug_assert(shift > 0);
176 tcg_debug_assert(shift <= (8 << vece));
177
178 /*
179 * Shifts larger than the element size are architecturally valid.
180 * Signed results in all sign bits.
181 */
182 shift = MIN(shift, (8 << vece) - 1);
183 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
184 }
185
gen_usra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)186 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
187 {
188 tcg_gen_vec_shr8i_i64(a, a, shift);
189 tcg_gen_vec_add8_i64(d, d, a);
190 }
191
gen_usra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)192 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
193 {
194 tcg_gen_vec_shr16i_i64(a, a, shift);
195 tcg_gen_vec_add16_i64(d, d, a);
196 }
197
gen_usra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)198 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
199 {
200 tcg_gen_shri_i32(a, a, shift);
201 tcg_gen_add_i32(d, d, a);
202 }
203
gen_usra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)204 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
205 {
206 tcg_gen_shri_i64(a, a, shift);
207 tcg_gen_add_i64(d, d, a);
208 }
209
gen_usra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)210 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
211 {
212 tcg_gen_shri_vec(vece, a, a, sh);
213 tcg_gen_add_vec(vece, d, d, a);
214 }
215
gen_gvec_usra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)216 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
217 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
218 {
219 static const TCGOpcode vecop_list[] = {
220 INDEX_op_shri_vec, INDEX_op_add_vec, 0
221 };
222 static const GVecGen2i ops[4] = {
223 { .fni8 = gen_usra8_i64,
224 .fniv = gen_usra_vec,
225 .fno = gen_helper_gvec_usra_b,
226 .load_dest = true,
227 .opt_opc = vecop_list,
228 .vece = MO_8, },
229 { .fni8 = gen_usra16_i64,
230 .fniv = gen_usra_vec,
231 .fno = gen_helper_gvec_usra_h,
232 .load_dest = true,
233 .opt_opc = vecop_list,
234 .vece = MO_16, },
235 { .fni4 = gen_usra32_i32,
236 .fniv = gen_usra_vec,
237 .fno = gen_helper_gvec_usra_s,
238 .load_dest = true,
239 .opt_opc = vecop_list,
240 .vece = MO_32, },
241 { .fni8 = gen_usra64_i64,
242 .fniv = gen_usra_vec,
243 .fno = gen_helper_gvec_usra_d,
244 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
245 .load_dest = true,
246 .opt_opc = vecop_list,
247 .vece = MO_64, },
248 };
249
250 /* tszimm encoding produces immediates in the range [1..esize]. */
251 tcg_debug_assert(shift > 0);
252 tcg_debug_assert(shift <= (8 << vece));
253
254 /*
255 * Shifts larger than the element size are architecturally valid.
256 * Unsigned results in all zeros as input to accumulate: nop.
257 */
258 if (shift < (8 << vece)) {
259 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
260 } else {
261 /* Nop, but we do need to clear the tail. */
262 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
263 }
264 }
265
266 /*
267 * Shift one less than the requested amount, and the low bit is
268 * the rounding bit. For the 8 and 16-bit operations, because we
269 * mask the low bit, we can perform a normal integer shift instead
270 * of a vector shift.
271 */
gen_srshr8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)272 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
273 {
274 TCGv_i64 t = tcg_temp_new_i64();
275
276 tcg_gen_shri_i64(t, a, sh - 1);
277 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
278 tcg_gen_vec_sar8i_i64(d, a, sh);
279 tcg_gen_vec_add8_i64(d, d, t);
280 }
281
gen_srshr16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)282 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
283 {
284 TCGv_i64 t = tcg_temp_new_i64();
285
286 tcg_gen_shri_i64(t, a, sh - 1);
287 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
288 tcg_gen_vec_sar16i_i64(d, a, sh);
289 tcg_gen_vec_add16_i64(d, d, t);
290 }
291
gen_srshr32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)292 void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
293 {
294 TCGv_i32 t;
295
296 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
297 if (sh == 32) {
298 tcg_gen_movi_i32(d, 0);
299 return;
300 }
301 t = tcg_temp_new_i32();
302 tcg_gen_extract_i32(t, a, sh - 1, 1);
303 tcg_gen_sari_i32(d, a, sh);
304 tcg_gen_add_i32(d, d, t);
305 }
306
gen_srshr64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)307 void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
308 {
309 TCGv_i64 t = tcg_temp_new_i64();
310
311 tcg_gen_extract_i64(t, a, sh - 1, 1);
312 tcg_gen_sari_i64(d, a, sh);
313 tcg_gen_add_i64(d, d, t);
314 }
315
gen_srshr_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)316 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
317 {
318 TCGv_vec t = tcg_temp_new_vec_matching(d);
319 TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
320
321 tcg_gen_shri_vec(vece, t, a, sh - 1);
322 tcg_gen_and_vec(vece, t, t, ones);
323 tcg_gen_sari_vec(vece, d, a, sh);
324 tcg_gen_add_vec(vece, d, d, t);
325 }
326
gen_gvec_srshr(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)327 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
328 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
329 {
330 static const TCGOpcode vecop_list[] = {
331 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
332 };
333 static const GVecGen2i ops[4] = {
334 { .fni8 = gen_srshr8_i64,
335 .fniv = gen_srshr_vec,
336 .fno = gen_helper_gvec_srshr_b,
337 .opt_opc = vecop_list,
338 .vece = MO_8 },
339 { .fni8 = gen_srshr16_i64,
340 .fniv = gen_srshr_vec,
341 .fno = gen_helper_gvec_srshr_h,
342 .opt_opc = vecop_list,
343 .vece = MO_16 },
344 { .fni4 = gen_srshr32_i32,
345 .fniv = gen_srshr_vec,
346 .fno = gen_helper_gvec_srshr_s,
347 .opt_opc = vecop_list,
348 .vece = MO_32 },
349 { .fni8 = gen_srshr64_i64,
350 .fniv = gen_srshr_vec,
351 .fno = gen_helper_gvec_srshr_d,
352 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
353 .opt_opc = vecop_list,
354 .vece = MO_64 },
355 };
356
357 /* tszimm encoding produces immediates in the range [1..esize] */
358 tcg_debug_assert(shift > 0);
359 tcg_debug_assert(shift <= (8 << vece));
360
361 if (shift == (8 << vece)) {
362 /*
363 * Shifts larger than the element size are architecturally valid.
364 * Signed results in all sign bits. With rounding, this produces
365 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
366 * I.e. always zero.
367 */
368 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
369 } else {
370 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
371 }
372 }
373
gen_srsra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)374 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
375 {
376 TCGv_i64 t = tcg_temp_new_i64();
377
378 gen_srshr8_i64(t, a, sh);
379 tcg_gen_vec_add8_i64(d, d, t);
380 }
381
gen_srsra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)382 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
383 {
384 TCGv_i64 t = tcg_temp_new_i64();
385
386 gen_srshr16_i64(t, a, sh);
387 tcg_gen_vec_add16_i64(d, d, t);
388 }
389
gen_srsra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)390 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
391 {
392 TCGv_i32 t = tcg_temp_new_i32();
393
394 gen_srshr32_i32(t, a, sh);
395 tcg_gen_add_i32(d, d, t);
396 }
397
gen_srsra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)398 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
399 {
400 TCGv_i64 t = tcg_temp_new_i64();
401
402 gen_srshr64_i64(t, a, sh);
403 tcg_gen_add_i64(d, d, t);
404 }
405
gen_srsra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)406 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
407 {
408 TCGv_vec t = tcg_temp_new_vec_matching(d);
409
410 gen_srshr_vec(vece, t, a, sh);
411 tcg_gen_add_vec(vece, d, d, t);
412 }
413
gen_gvec_srsra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)414 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
415 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
416 {
417 static const TCGOpcode vecop_list[] = {
418 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
419 };
420 static const GVecGen2i ops[4] = {
421 { .fni8 = gen_srsra8_i64,
422 .fniv = gen_srsra_vec,
423 .fno = gen_helper_gvec_srsra_b,
424 .opt_opc = vecop_list,
425 .load_dest = true,
426 .vece = MO_8 },
427 { .fni8 = gen_srsra16_i64,
428 .fniv = gen_srsra_vec,
429 .fno = gen_helper_gvec_srsra_h,
430 .opt_opc = vecop_list,
431 .load_dest = true,
432 .vece = MO_16 },
433 { .fni4 = gen_srsra32_i32,
434 .fniv = gen_srsra_vec,
435 .fno = gen_helper_gvec_srsra_s,
436 .opt_opc = vecop_list,
437 .load_dest = true,
438 .vece = MO_32 },
439 { .fni8 = gen_srsra64_i64,
440 .fniv = gen_srsra_vec,
441 .fno = gen_helper_gvec_srsra_d,
442 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
443 .opt_opc = vecop_list,
444 .load_dest = true,
445 .vece = MO_64 },
446 };
447
448 /* tszimm encoding produces immediates in the range [1..esize] */
449 tcg_debug_assert(shift > 0);
450 tcg_debug_assert(shift <= (8 << vece));
451
452 /*
453 * Shifts larger than the element size are architecturally valid.
454 * Signed results in all sign bits. With rounding, this produces
455 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
456 * I.e. always zero. With accumulation, this leaves D unchanged.
457 */
458 if (shift == (8 << vece)) {
459 /* Nop, but we do need to clear the tail. */
460 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
461 } else {
462 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
463 }
464 }
465
gen_urshr8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)466 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
467 {
468 TCGv_i64 t = tcg_temp_new_i64();
469
470 tcg_gen_shri_i64(t, a, sh - 1);
471 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
472 tcg_gen_vec_shr8i_i64(d, a, sh);
473 tcg_gen_vec_add8_i64(d, d, t);
474 }
475
gen_urshr16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)476 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
477 {
478 TCGv_i64 t = tcg_temp_new_i64();
479
480 tcg_gen_shri_i64(t, a, sh - 1);
481 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
482 tcg_gen_vec_shr16i_i64(d, a, sh);
483 tcg_gen_vec_add16_i64(d, d, t);
484 }
485
gen_urshr32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)486 void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
487 {
488 TCGv_i32 t;
489
490 /* Handle shift by the input size for the benefit of trans_URSHR_ri */
491 if (sh == 32) {
492 tcg_gen_extract_i32(d, a, sh - 1, 1);
493 return;
494 }
495 t = tcg_temp_new_i32();
496 tcg_gen_extract_i32(t, a, sh - 1, 1);
497 tcg_gen_shri_i32(d, a, sh);
498 tcg_gen_add_i32(d, d, t);
499 }
500
gen_urshr64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)501 void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
502 {
503 TCGv_i64 t = tcg_temp_new_i64();
504
505 tcg_gen_extract_i64(t, a, sh - 1, 1);
506 tcg_gen_shri_i64(d, a, sh);
507 tcg_gen_add_i64(d, d, t);
508 }
509
gen_urshr_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t shift)510 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
511 {
512 TCGv_vec t = tcg_temp_new_vec_matching(d);
513 TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
514
515 tcg_gen_shri_vec(vece, t, a, shift - 1);
516 tcg_gen_and_vec(vece, t, t, ones);
517 tcg_gen_shri_vec(vece, d, a, shift);
518 tcg_gen_add_vec(vece, d, d, t);
519 }
520
gen_gvec_urshr(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)521 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
522 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
523 {
524 static const TCGOpcode vecop_list[] = {
525 INDEX_op_shri_vec, INDEX_op_add_vec, 0
526 };
527 static const GVecGen2i ops[4] = {
528 { .fni8 = gen_urshr8_i64,
529 .fniv = gen_urshr_vec,
530 .fno = gen_helper_gvec_urshr_b,
531 .opt_opc = vecop_list,
532 .vece = MO_8 },
533 { .fni8 = gen_urshr16_i64,
534 .fniv = gen_urshr_vec,
535 .fno = gen_helper_gvec_urshr_h,
536 .opt_opc = vecop_list,
537 .vece = MO_16 },
538 { .fni4 = gen_urshr32_i32,
539 .fniv = gen_urshr_vec,
540 .fno = gen_helper_gvec_urshr_s,
541 .opt_opc = vecop_list,
542 .vece = MO_32 },
543 { .fni8 = gen_urshr64_i64,
544 .fniv = gen_urshr_vec,
545 .fno = gen_helper_gvec_urshr_d,
546 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
547 .opt_opc = vecop_list,
548 .vece = MO_64 },
549 };
550
551 /* tszimm encoding produces immediates in the range [1..esize] */
552 tcg_debug_assert(shift > 0);
553 tcg_debug_assert(shift <= (8 << vece));
554
555 if (shift == (8 << vece)) {
556 /*
557 * Shifts larger than the element size are architecturally valid.
558 * Unsigned results in zero. With rounding, this produces a
559 * copy of the most significant bit.
560 */
561 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
562 } else {
563 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
564 }
565 }
566
gen_ursra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)567 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
568 {
569 TCGv_i64 t = tcg_temp_new_i64();
570
571 if (sh == 8) {
572 tcg_gen_vec_shr8i_i64(t, a, 7);
573 } else {
574 gen_urshr8_i64(t, a, sh);
575 }
576 tcg_gen_vec_add8_i64(d, d, t);
577 }
578
gen_ursra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)579 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
580 {
581 TCGv_i64 t = tcg_temp_new_i64();
582
583 if (sh == 16) {
584 tcg_gen_vec_shr16i_i64(t, a, 15);
585 } else {
586 gen_urshr16_i64(t, a, sh);
587 }
588 tcg_gen_vec_add16_i64(d, d, t);
589 }
590
gen_ursra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)591 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
592 {
593 TCGv_i32 t = tcg_temp_new_i32();
594
595 if (sh == 32) {
596 tcg_gen_shri_i32(t, a, 31);
597 } else {
598 gen_urshr32_i32(t, a, sh);
599 }
600 tcg_gen_add_i32(d, d, t);
601 }
602
gen_ursra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)603 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
604 {
605 TCGv_i64 t = tcg_temp_new_i64();
606
607 if (sh == 64) {
608 tcg_gen_shri_i64(t, a, 63);
609 } else {
610 gen_urshr64_i64(t, a, sh);
611 }
612 tcg_gen_add_i64(d, d, t);
613 }
614
gen_ursra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)615 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
616 {
617 TCGv_vec t = tcg_temp_new_vec_matching(d);
618
619 if (sh == (8 << vece)) {
620 tcg_gen_shri_vec(vece, t, a, sh - 1);
621 } else {
622 gen_urshr_vec(vece, t, a, sh);
623 }
624 tcg_gen_add_vec(vece, d, d, t);
625 }
626
gen_gvec_ursra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)627 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
628 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
629 {
630 static const TCGOpcode vecop_list[] = {
631 INDEX_op_shri_vec, INDEX_op_add_vec, 0
632 };
633 static const GVecGen2i ops[4] = {
634 { .fni8 = gen_ursra8_i64,
635 .fniv = gen_ursra_vec,
636 .fno = gen_helper_gvec_ursra_b,
637 .opt_opc = vecop_list,
638 .load_dest = true,
639 .vece = MO_8 },
640 { .fni8 = gen_ursra16_i64,
641 .fniv = gen_ursra_vec,
642 .fno = gen_helper_gvec_ursra_h,
643 .opt_opc = vecop_list,
644 .load_dest = true,
645 .vece = MO_16 },
646 { .fni4 = gen_ursra32_i32,
647 .fniv = gen_ursra_vec,
648 .fno = gen_helper_gvec_ursra_s,
649 .opt_opc = vecop_list,
650 .load_dest = true,
651 .vece = MO_32 },
652 { .fni8 = gen_ursra64_i64,
653 .fniv = gen_ursra_vec,
654 .fno = gen_helper_gvec_ursra_d,
655 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
656 .opt_opc = vecop_list,
657 .load_dest = true,
658 .vece = MO_64 },
659 };
660
661 /* tszimm encoding produces immediates in the range [1..esize] */
662 tcg_debug_assert(shift > 0);
663 tcg_debug_assert(shift <= (8 << vece));
664
665 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
666 }
667
gen_shr8_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)668 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
669 {
670 uint64_t mask = dup_const(MO_8, 0xff >> shift);
671 TCGv_i64 t = tcg_temp_new_i64();
672
673 tcg_gen_shri_i64(t, a, shift);
674 tcg_gen_andi_i64(t, t, mask);
675 tcg_gen_andi_i64(d, d, ~mask);
676 tcg_gen_or_i64(d, d, t);
677 }
678
gen_shr16_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)679 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
680 {
681 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
682 TCGv_i64 t = tcg_temp_new_i64();
683
684 tcg_gen_shri_i64(t, a, shift);
685 tcg_gen_andi_i64(t, t, mask);
686 tcg_gen_andi_i64(d, d, ~mask);
687 tcg_gen_or_i64(d, d, t);
688 }
689
gen_shr32_ins_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)690 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
691 {
692 tcg_gen_shri_i32(a, a, shift);
693 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
694 }
695
gen_shr64_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)696 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
697 {
698 tcg_gen_shri_i64(a, a, shift);
699 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
700 }
701
gen_shr_ins_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)702 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
703 {
704 TCGv_vec t = tcg_temp_new_vec_matching(d);
705 int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh);
706 TCGv_vec m = tcg_constant_vec_matching(d, vece, mi);
707
708 tcg_gen_shri_vec(vece, t, a, sh);
709 tcg_gen_and_vec(vece, d, d, m);
710 tcg_gen_or_vec(vece, d, d, t);
711 }
712
gen_gvec_sri(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)713 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
714 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
715 {
716 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
717 const GVecGen2i ops[4] = {
718 { .fni8 = gen_shr8_ins_i64,
719 .fniv = gen_shr_ins_vec,
720 .fno = gen_helper_gvec_sri_b,
721 .load_dest = true,
722 .opt_opc = vecop_list,
723 .vece = MO_8 },
724 { .fni8 = gen_shr16_ins_i64,
725 .fniv = gen_shr_ins_vec,
726 .fno = gen_helper_gvec_sri_h,
727 .load_dest = true,
728 .opt_opc = vecop_list,
729 .vece = MO_16 },
730 { .fni4 = gen_shr32_ins_i32,
731 .fniv = gen_shr_ins_vec,
732 .fno = gen_helper_gvec_sri_s,
733 .load_dest = true,
734 .opt_opc = vecop_list,
735 .vece = MO_32 },
736 { .fni8 = gen_shr64_ins_i64,
737 .fniv = gen_shr_ins_vec,
738 .fno = gen_helper_gvec_sri_d,
739 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
740 .load_dest = true,
741 .opt_opc = vecop_list,
742 .vece = MO_64 },
743 };
744
745 /* tszimm encoding produces immediates in the range [1..esize]. */
746 tcg_debug_assert(shift > 0);
747 tcg_debug_assert(shift <= (8 << vece));
748
749 /* Shift of esize leaves destination unchanged. */
750 if (shift < (8 << vece)) {
751 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
752 } else {
753 /* Nop, but we do need to clear the tail. */
754 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
755 }
756 }
757
gen_shl8_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)758 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
759 {
760 uint64_t mask = dup_const(MO_8, 0xff << shift);
761 TCGv_i64 t = tcg_temp_new_i64();
762
763 tcg_gen_shli_i64(t, a, shift);
764 tcg_gen_andi_i64(t, t, mask);
765 tcg_gen_andi_i64(d, d, ~mask);
766 tcg_gen_or_i64(d, d, t);
767 }
768
gen_shl16_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)769 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
770 {
771 uint64_t mask = dup_const(MO_16, 0xffff << shift);
772 TCGv_i64 t = tcg_temp_new_i64();
773
774 tcg_gen_shli_i64(t, a, shift);
775 tcg_gen_andi_i64(t, t, mask);
776 tcg_gen_andi_i64(d, d, ~mask);
777 tcg_gen_or_i64(d, d, t);
778 }
779
gen_shl32_ins_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)780 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
781 {
782 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
783 }
784
gen_shl64_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)785 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
786 {
787 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
788 }
789
gen_shl_ins_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)790 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
791 {
792 TCGv_vec t = tcg_temp_new_vec_matching(d);
793 TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh));
794
795 tcg_gen_shli_vec(vece, t, a, sh);
796 tcg_gen_and_vec(vece, d, d, m);
797 tcg_gen_or_vec(vece, d, d, t);
798 }
799
gen_gvec_sli(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)800 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
801 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
802 {
803 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
804 const GVecGen2i ops[4] = {
805 { .fni8 = gen_shl8_ins_i64,
806 .fniv = gen_shl_ins_vec,
807 .fno = gen_helper_gvec_sli_b,
808 .load_dest = true,
809 .opt_opc = vecop_list,
810 .vece = MO_8 },
811 { .fni8 = gen_shl16_ins_i64,
812 .fniv = gen_shl_ins_vec,
813 .fno = gen_helper_gvec_sli_h,
814 .load_dest = true,
815 .opt_opc = vecop_list,
816 .vece = MO_16 },
817 { .fni4 = gen_shl32_ins_i32,
818 .fniv = gen_shl_ins_vec,
819 .fno = gen_helper_gvec_sli_s,
820 .load_dest = true,
821 .opt_opc = vecop_list,
822 .vece = MO_32 },
823 { .fni8 = gen_shl64_ins_i64,
824 .fniv = gen_shl_ins_vec,
825 .fno = gen_helper_gvec_sli_d,
826 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
827 .load_dest = true,
828 .opt_opc = vecop_list,
829 .vece = MO_64 },
830 };
831
832 /* tszimm encoding produces immediates in the range [0..esize-1]. */
833 tcg_debug_assert(shift >= 0);
834 tcg_debug_assert(shift < (8 << vece));
835
836 if (shift == 0) {
837 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
838 } else {
839 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
840 }
841 }
842
gen_mla8_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)843 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
844 {
845 gen_helper_neon_mul_u8(a, a, b);
846 gen_helper_neon_add_u8(d, d, a);
847 }
848
gen_mls8_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)849 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
850 {
851 gen_helper_neon_mul_u8(a, a, b);
852 gen_helper_neon_sub_u8(d, d, a);
853 }
854
gen_mla16_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)855 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
856 {
857 gen_helper_neon_mul_u16(a, a, b);
858 gen_helper_neon_add_u16(d, d, a);
859 }
860
gen_mls16_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)861 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
862 {
863 gen_helper_neon_mul_u16(a, a, b);
864 gen_helper_neon_sub_u16(d, d, a);
865 }
866
gen_mla32_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)867 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
868 {
869 tcg_gen_mul_i32(a, a, b);
870 tcg_gen_add_i32(d, d, a);
871 }
872
gen_mls32_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)873 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
874 {
875 tcg_gen_mul_i32(a, a, b);
876 tcg_gen_sub_i32(d, d, a);
877 }
878
gen_mla64_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)879 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
880 {
881 tcg_gen_mul_i64(a, a, b);
882 tcg_gen_add_i64(d, d, a);
883 }
884
gen_mls64_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)885 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
886 {
887 tcg_gen_mul_i64(a, a, b);
888 tcg_gen_sub_i64(d, d, a);
889 }
890
gen_mla_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)891 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
892 {
893 tcg_gen_mul_vec(vece, a, a, b);
894 tcg_gen_add_vec(vece, d, d, a);
895 }
896
gen_mls_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)897 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
898 {
899 tcg_gen_mul_vec(vece, a, a, b);
900 tcg_gen_sub_vec(vece, d, d, a);
901 }
902
903 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
904 * these tables are shared with AArch64 which does support them.
905 */
gen_gvec_mla(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)906 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
907 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
908 {
909 static const TCGOpcode vecop_list[] = {
910 INDEX_op_mul_vec, INDEX_op_add_vec, 0
911 };
912 static const GVecGen3 ops[4] = {
913 { .fni4 = gen_mla8_i32,
914 .fniv = gen_mla_vec,
915 .load_dest = true,
916 .opt_opc = vecop_list,
917 .vece = MO_8 },
918 { .fni4 = gen_mla16_i32,
919 .fniv = gen_mla_vec,
920 .load_dest = true,
921 .opt_opc = vecop_list,
922 .vece = MO_16 },
923 { .fni4 = gen_mla32_i32,
924 .fniv = gen_mla_vec,
925 .load_dest = true,
926 .opt_opc = vecop_list,
927 .vece = MO_32 },
928 { .fni8 = gen_mla64_i64,
929 .fniv = gen_mla_vec,
930 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
931 .load_dest = true,
932 .opt_opc = vecop_list,
933 .vece = MO_64 },
934 };
935 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
936 }
937
gen_gvec_mls(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)938 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
939 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
940 {
941 static const TCGOpcode vecop_list[] = {
942 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
943 };
944 static const GVecGen3 ops[4] = {
945 { .fni4 = gen_mls8_i32,
946 .fniv = gen_mls_vec,
947 .load_dest = true,
948 .opt_opc = vecop_list,
949 .vece = MO_8 },
950 { .fni4 = gen_mls16_i32,
951 .fniv = gen_mls_vec,
952 .load_dest = true,
953 .opt_opc = vecop_list,
954 .vece = MO_16 },
955 { .fni4 = gen_mls32_i32,
956 .fniv = gen_mls_vec,
957 .load_dest = true,
958 .opt_opc = vecop_list,
959 .vece = MO_32 },
960 { .fni8 = gen_mls64_i64,
961 .fniv = gen_mls_vec,
962 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
963 .load_dest = true,
964 .opt_opc = vecop_list,
965 .vece = MO_64 },
966 };
967 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
968 }
969
970 /* CMTST : test is "if (X & Y != 0)". */
gen_cmtst_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)971 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
972 {
973 tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b);
974 }
975
gen_cmtst_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)976 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
977 {
978 tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b);
979 }
980
gen_cmtst_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)981 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
982 {
983 tcg_gen_cmp_vec(TCG_COND_TSTNE, vece, d, a, b);
984 }
985
gen_gvec_cmtst(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)986 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
987 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
988 {
989 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
990 static const GVecGen3 ops[4] = {
991 { .fni4 = gen_helper_neon_tst_u8,
992 .fniv = gen_cmtst_vec,
993 .opt_opc = vecop_list,
994 .vece = MO_8 },
995 { .fni4 = gen_helper_neon_tst_u16,
996 .fniv = gen_cmtst_vec,
997 .opt_opc = vecop_list,
998 .vece = MO_16 },
999 { .fni4 = gen_cmtst_i32,
1000 .fniv = gen_cmtst_vec,
1001 .opt_opc = vecop_list,
1002 .vece = MO_32 },
1003 { .fni8 = gen_cmtst_i64,
1004 .fniv = gen_cmtst_vec,
1005 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1006 .opt_opc = vecop_list,
1007 .vece = MO_64 },
1008 };
1009 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1010 }
1011
gen_ushl_i32(TCGv_i32 dst,TCGv_i32 src,TCGv_i32 shift)1012 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
1013 {
1014 TCGv_i32 lval = tcg_temp_new_i32();
1015 TCGv_i32 rval = tcg_temp_new_i32();
1016 TCGv_i32 lsh = tcg_temp_new_i32();
1017 TCGv_i32 rsh = tcg_temp_new_i32();
1018 TCGv_i32 zero = tcg_constant_i32(0);
1019 TCGv_i32 max = tcg_constant_i32(32);
1020
1021 /*
1022 * Rely on the TCG guarantee that out of range shifts produce
1023 * unspecified results, not undefined behaviour (i.e. no trap).
1024 * Discard out-of-range results after the fact.
1025 */
1026 tcg_gen_ext8s_i32(lsh, shift);
1027 tcg_gen_neg_i32(rsh, lsh);
1028 tcg_gen_shl_i32(lval, src, lsh);
1029 tcg_gen_shr_i32(rval, src, rsh);
1030 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
1031 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
1032 }
1033
gen_ushl_i64(TCGv_i64 dst,TCGv_i64 src,TCGv_i64 shift)1034 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
1035 {
1036 TCGv_i64 lval = tcg_temp_new_i64();
1037 TCGv_i64 rval = tcg_temp_new_i64();
1038 TCGv_i64 lsh = tcg_temp_new_i64();
1039 TCGv_i64 rsh = tcg_temp_new_i64();
1040 TCGv_i64 zero = tcg_constant_i64(0);
1041 TCGv_i64 max = tcg_constant_i64(64);
1042
1043 /*
1044 * Rely on the TCG guarantee that out of range shifts produce
1045 * unspecified results, not undefined behaviour (i.e. no trap).
1046 * Discard out-of-range results after the fact.
1047 */
1048 tcg_gen_ext8s_i64(lsh, shift);
1049 tcg_gen_neg_i64(rsh, lsh);
1050 tcg_gen_shl_i64(lval, src, lsh);
1051 tcg_gen_shr_i64(rval, src, rsh);
1052 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
1053 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
1054 }
1055
gen_ushl_vec(unsigned vece,TCGv_vec dst,TCGv_vec src,TCGv_vec shift)1056 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
1057 TCGv_vec src, TCGv_vec shift)
1058 {
1059 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
1060 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
1061 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
1062 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1063 TCGv_vec max, zero;
1064
1065 tcg_gen_neg_vec(vece, rsh, shift);
1066 if (vece == MO_8) {
1067 tcg_gen_mov_vec(lsh, shift);
1068 } else {
1069 TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
1070 tcg_gen_and_vec(vece, lsh, shift, msk);
1071 tcg_gen_and_vec(vece, rsh, rsh, msk);
1072 }
1073
1074 /*
1075 * Rely on the TCG guarantee that out of range shifts produce
1076 * unspecified results, not undefined behaviour (i.e. no trap).
1077 * Discard out-of-range results after the fact.
1078 */
1079 tcg_gen_shlv_vec(vece, lval, src, lsh);
1080 tcg_gen_shrv_vec(vece, rval, src, rsh);
1081
1082 /*
1083 * The choice of GE (signed) and GEU (unsigned) are biased toward
1084 * the instructions of the x86_64 host. For MO_8, the whole byte
1085 * is significant so we must use an unsigned compare; otherwise we
1086 * have already masked to a byte and so a signed compare works.
1087 * Other tcg hosts have a full set of comparisons and do not care.
1088 */
1089 zero = tcg_constant_vec_matching(dst, vece, 0);
1090 max = tcg_constant_vec_matching(dst, vece, 8 << vece);
1091 if (vece == MO_8) {
1092 tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, lval, lsh, max, zero, lval);
1093 tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, rval, rsh, max, zero, rval);
1094 } else {
1095 tcg_gen_cmpsel_vec(TCG_COND_GE, vece, lval, lsh, max, zero, lval);
1096 tcg_gen_cmpsel_vec(TCG_COND_GE, vece, rval, rsh, max, zero, rval);
1097 }
1098 tcg_gen_or_vec(vece, dst, lval, rval);
1099 }
1100
gen_gvec_ushl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1101 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1102 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1103 {
1104 static const TCGOpcode vecop_list[] = {
1105 INDEX_op_neg_vec, INDEX_op_shlv_vec,
1106 INDEX_op_shrv_vec, INDEX_op_cmpsel_vec, 0
1107 };
1108 static const GVecGen3 ops[4] = {
1109 { .fniv = gen_ushl_vec,
1110 .fno = gen_helper_gvec_ushl_b,
1111 .opt_opc = vecop_list,
1112 .vece = MO_8 },
1113 { .fniv = gen_ushl_vec,
1114 .fno = gen_helper_gvec_ushl_h,
1115 .opt_opc = vecop_list,
1116 .vece = MO_16 },
1117 { .fni4 = gen_ushl_i32,
1118 .fniv = gen_ushl_vec,
1119 .opt_opc = vecop_list,
1120 .vece = MO_32 },
1121 { .fni8 = gen_ushl_i64,
1122 .fniv = gen_ushl_vec,
1123 .opt_opc = vecop_list,
1124 .vece = MO_64 },
1125 };
1126 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1127 }
1128
gen_sshl_i32(TCGv_i32 dst,TCGv_i32 src,TCGv_i32 shift)1129 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
1130 {
1131 TCGv_i32 lval = tcg_temp_new_i32();
1132 TCGv_i32 rval = tcg_temp_new_i32();
1133 TCGv_i32 lsh = tcg_temp_new_i32();
1134 TCGv_i32 rsh = tcg_temp_new_i32();
1135 TCGv_i32 zero = tcg_constant_i32(0);
1136 TCGv_i32 max = tcg_constant_i32(31);
1137
1138 /*
1139 * Rely on the TCG guarantee that out of range shifts produce
1140 * unspecified results, not undefined behaviour (i.e. no trap).
1141 * Discard out-of-range results after the fact.
1142 */
1143 tcg_gen_ext8s_i32(lsh, shift);
1144 tcg_gen_neg_i32(rsh, lsh);
1145 tcg_gen_shl_i32(lval, src, lsh);
1146 tcg_gen_umin_i32(rsh, rsh, max);
1147 tcg_gen_sar_i32(rval, src, rsh);
1148 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
1149 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
1150 }
1151
gen_sshl_i64(TCGv_i64 dst,TCGv_i64 src,TCGv_i64 shift)1152 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
1153 {
1154 TCGv_i64 lval = tcg_temp_new_i64();
1155 TCGv_i64 rval = tcg_temp_new_i64();
1156 TCGv_i64 lsh = tcg_temp_new_i64();
1157 TCGv_i64 rsh = tcg_temp_new_i64();
1158 TCGv_i64 zero = tcg_constant_i64(0);
1159 TCGv_i64 max = tcg_constant_i64(63);
1160
1161 /*
1162 * Rely on the TCG guarantee that out of range shifts produce
1163 * unspecified results, not undefined behaviour (i.e. no trap).
1164 * Discard out-of-range results after the fact.
1165 */
1166 tcg_gen_ext8s_i64(lsh, shift);
1167 tcg_gen_neg_i64(rsh, lsh);
1168 tcg_gen_shl_i64(lval, src, lsh);
1169 tcg_gen_umin_i64(rsh, rsh, max);
1170 tcg_gen_sar_i64(rval, src, rsh);
1171 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
1172 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
1173 }
1174
gen_sshl_vec(unsigned vece,TCGv_vec dst,TCGv_vec src,TCGv_vec shift)1175 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
1176 TCGv_vec src, TCGv_vec shift)
1177 {
1178 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
1179 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
1180 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
1181 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1182 TCGv_vec max, zero;
1183
1184 /*
1185 * Rely on the TCG guarantee that out of range shifts produce
1186 * unspecified results, not undefined behaviour (i.e. no trap).
1187 * Discard out-of-range results after the fact.
1188 */
1189 tcg_gen_neg_vec(vece, rsh, shift);
1190 if (vece == MO_8) {
1191 tcg_gen_mov_vec(lsh, shift);
1192 } else {
1193 TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
1194 tcg_gen_and_vec(vece, lsh, shift, msk);
1195 tcg_gen_and_vec(vece, rsh, rsh, msk);
1196 }
1197
1198 /* Bound rsh so out of bound right shift gets -1. */
1199 max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1);
1200 tcg_gen_umin_vec(vece, rsh, rsh, max);
1201
1202 tcg_gen_shlv_vec(vece, lval, src, lsh);
1203 tcg_gen_sarv_vec(vece, rval, src, rsh);
1204
1205 /* Select in-bound left shift. */
1206 zero = tcg_constant_vec_matching(dst, vece, 0);
1207 tcg_gen_cmpsel_vec(TCG_COND_GT, vece, lval, lsh, max, zero, lval);
1208
1209 /* Select between left and right shift. */
1210 if (vece == MO_8) {
1211 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval);
1212 } else {
1213 TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80);
1214 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval);
1215 }
1216 }
1217
gen_gvec_sshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1218 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1219 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1220 {
1221 static const TCGOpcode vecop_list[] = {
1222 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
1223 INDEX_op_sarv_vec, INDEX_op_cmpsel_vec, 0
1224 };
1225 static const GVecGen3 ops[4] = {
1226 { .fniv = gen_sshl_vec,
1227 .fno = gen_helper_gvec_sshl_b,
1228 .opt_opc = vecop_list,
1229 .vece = MO_8 },
1230 { .fniv = gen_sshl_vec,
1231 .fno = gen_helper_gvec_sshl_h,
1232 .opt_opc = vecop_list,
1233 .vece = MO_16 },
1234 { .fni4 = gen_sshl_i32,
1235 .fniv = gen_sshl_vec,
1236 .opt_opc = vecop_list,
1237 .vece = MO_32 },
1238 { .fni8 = gen_sshl_i64,
1239 .fniv = gen_sshl_vec,
1240 .opt_opc = vecop_list,
1241 .vece = MO_64 },
1242 };
1243 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1244 }
1245
gen_gvec_srshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1246 void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1247 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1248 {
1249 static gen_helper_gvec_3 * const fns[] = {
1250 gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h,
1251 gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d,
1252 };
1253 tcg_debug_assert(vece <= MO_64);
1254 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1255 }
1256
gen_gvec_urshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1257 void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1258 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1259 {
1260 static gen_helper_gvec_3 * const fns[] = {
1261 gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h,
1262 gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d,
1263 };
1264 tcg_debug_assert(vece <= MO_64);
1265 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1266 }
1267
gen_neon_sqshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1268 void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1269 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1270 {
1271 static gen_helper_gvec_3_ptr * const fns[] = {
1272 gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h,
1273 gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d,
1274 };
1275 tcg_debug_assert(vece <= MO_64);
1276 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1277 opr_sz, max_sz, 0, fns[vece]);
1278 }
1279
gen_neon_uqshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1280 void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1281 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1282 {
1283 static gen_helper_gvec_3_ptr * const fns[] = {
1284 gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h,
1285 gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d,
1286 };
1287 tcg_debug_assert(vece <= MO_64);
1288 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1289 opr_sz, max_sz, 0, fns[vece]);
1290 }
1291
gen_neon_sqrshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1292 void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1293 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1294 {
1295 static gen_helper_gvec_3_ptr * const fns[] = {
1296 gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h,
1297 gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d,
1298 };
1299 tcg_debug_assert(vece <= MO_64);
1300 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1301 opr_sz, max_sz, 0, fns[vece]);
1302 }
1303
gen_neon_uqrshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1304 void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1305 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1306 {
1307 static gen_helper_gvec_3_ptr * const fns[] = {
1308 gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h,
1309 gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d,
1310 };
1311 tcg_debug_assert(vece <= MO_64);
1312 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1313 opr_sz, max_sz, 0, fns[vece]);
1314 }
1315
gen_neon_sqshli(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,int64_t c,uint32_t opr_sz,uint32_t max_sz)1316 void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1317 int64_t c, uint32_t opr_sz, uint32_t max_sz)
1318 {
1319 static gen_helper_gvec_2_ptr * const fns[] = {
1320 gen_helper_neon_sqshli_b, gen_helper_neon_sqshli_h,
1321 gen_helper_neon_sqshli_s, gen_helper_neon_sqshli_d,
1322 };
1323 tcg_debug_assert(vece <= MO_64);
1324 tcg_debug_assert(c >= 0 && c <= (8 << vece));
1325 tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
1326 }
1327
gen_neon_uqshli(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,int64_t c,uint32_t opr_sz,uint32_t max_sz)1328 void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1329 int64_t c, uint32_t opr_sz, uint32_t max_sz)
1330 {
1331 static gen_helper_gvec_2_ptr * const fns[] = {
1332 gen_helper_neon_uqshli_b, gen_helper_neon_uqshli_h,
1333 gen_helper_neon_uqshli_s, gen_helper_neon_uqshli_d,
1334 };
1335 tcg_debug_assert(vece <= MO_64);
1336 tcg_debug_assert(c >= 0 && c <= (8 << vece));
1337 tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
1338 }
1339
gen_neon_sqshlui(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,int64_t c,uint32_t opr_sz,uint32_t max_sz)1340 void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1341 int64_t c, uint32_t opr_sz, uint32_t max_sz)
1342 {
1343 static gen_helper_gvec_2_ptr * const fns[] = {
1344 gen_helper_neon_sqshlui_b, gen_helper_neon_sqshlui_h,
1345 gen_helper_neon_sqshlui_s, gen_helper_neon_sqshlui_d,
1346 };
1347 tcg_debug_assert(vece <= MO_64);
1348 tcg_debug_assert(c >= 0 && c <= (8 << vece));
1349 tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
1350 }
1351
gen_uqadd_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1352 void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1353 {
1354 uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
1355 TCGv_i64 tmp = tcg_temp_new_i64();
1356
1357 tcg_gen_add_i64(tmp, a, b);
1358 tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max));
1359 tcg_gen_xor_i64(tmp, tmp, res);
1360 tcg_gen_or_i64(qc, qc, tmp);
1361 }
1362
gen_uqadd_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1363 void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1364 {
1365 TCGv_i64 t = tcg_temp_new_i64();
1366
1367 tcg_gen_add_i64(t, a, b);
1368 tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a,
1369 tcg_constant_i64(UINT64_MAX), t);
1370 tcg_gen_xor_i64(t, t, res);
1371 tcg_gen_or_i64(qc, qc, t);
1372 }
1373
gen_uqadd_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)1374 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
1375 TCGv_vec a, TCGv_vec b)
1376 {
1377 TCGv_vec x = tcg_temp_new_vec_matching(t);
1378 tcg_gen_add_vec(vece, x, a, b);
1379 tcg_gen_usadd_vec(vece, t, a, b);
1380 tcg_gen_xor_vec(vece, x, x, t);
1381 tcg_gen_or_vec(vece, qc, qc, x);
1382 }
1383
gen_gvec_uqadd_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1384 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1385 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1386 {
1387 static const TCGOpcode vecop_list[] = {
1388 INDEX_op_usadd_vec, INDEX_op_add_vec, 0
1389 };
1390 static const GVecGen4 ops[4] = {
1391 { .fniv = gen_uqadd_vec,
1392 .fno = gen_helper_gvec_uqadd_b,
1393 .write_aofs = true,
1394 .opt_opc = vecop_list,
1395 .vece = MO_8 },
1396 { .fniv = gen_uqadd_vec,
1397 .fno = gen_helper_gvec_uqadd_h,
1398 .write_aofs = true,
1399 .opt_opc = vecop_list,
1400 .vece = MO_16 },
1401 { .fniv = gen_uqadd_vec,
1402 .fno = gen_helper_gvec_uqadd_s,
1403 .write_aofs = true,
1404 .opt_opc = vecop_list,
1405 .vece = MO_32 },
1406 { .fniv = gen_uqadd_vec,
1407 .fni8 = gen_uqadd_d,
1408 .fno = gen_helper_gvec_uqadd_d,
1409 .write_aofs = true,
1410 .opt_opc = vecop_list,
1411 .vece = MO_64 },
1412 };
1413
1414 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
1415 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1416 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1417 }
1418
gen_sqadd_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1419 void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1420 {
1421 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
1422 int64_t min = -1ll - max;
1423 TCGv_i64 tmp = tcg_temp_new_i64();
1424
1425 tcg_gen_add_i64(tmp, a, b);
1426 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
1427 tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
1428 tcg_gen_xor_i64(tmp, tmp, res);
1429 tcg_gen_or_i64(qc, qc, tmp);
1430 }
1431
gen_sqadd_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1432 void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1433 {
1434 TCGv_i64 t0 = tcg_temp_new_i64();
1435 TCGv_i64 t1 = tcg_temp_new_i64();
1436 TCGv_i64 t2 = tcg_temp_new_i64();
1437
1438 tcg_gen_add_i64(t0, a, b);
1439
1440 /* Compute signed overflow indication into T1 */
1441 tcg_gen_xor_i64(t1, a, b);
1442 tcg_gen_xor_i64(t2, t0, a);
1443 tcg_gen_andc_i64(t1, t2, t1);
1444
1445 /* Compute saturated value into T2 */
1446 tcg_gen_sari_i64(t2, a, 63);
1447 tcg_gen_xori_i64(t2, t2, INT64_MAX);
1448
1449 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
1450 tcg_gen_xor_i64(t0, t0, res);
1451 tcg_gen_or_i64(qc, qc, t0);
1452 }
1453
gen_sqadd_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)1454 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
1455 TCGv_vec a, TCGv_vec b)
1456 {
1457 TCGv_vec x = tcg_temp_new_vec_matching(t);
1458 tcg_gen_add_vec(vece, x, a, b);
1459 tcg_gen_ssadd_vec(vece, t, a, b);
1460 tcg_gen_xor_vec(vece, x, x, t);
1461 tcg_gen_or_vec(vece, qc, qc, x);
1462 }
1463
gen_gvec_sqadd_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1464 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1465 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1466 {
1467 static const TCGOpcode vecop_list[] = {
1468 INDEX_op_ssadd_vec, INDEX_op_add_vec, 0
1469 };
1470 static const GVecGen4 ops[4] = {
1471 { .fniv = gen_sqadd_vec,
1472 .fno = gen_helper_gvec_sqadd_b,
1473 .opt_opc = vecop_list,
1474 .write_aofs = true,
1475 .vece = MO_8 },
1476 { .fniv = gen_sqadd_vec,
1477 .fno = gen_helper_gvec_sqadd_h,
1478 .opt_opc = vecop_list,
1479 .write_aofs = true,
1480 .vece = MO_16 },
1481 { .fniv = gen_sqadd_vec,
1482 .fno = gen_helper_gvec_sqadd_s,
1483 .opt_opc = vecop_list,
1484 .write_aofs = true,
1485 .vece = MO_32 },
1486 { .fniv = gen_sqadd_vec,
1487 .fni8 = gen_sqadd_d,
1488 .fno = gen_helper_gvec_sqadd_d,
1489 .opt_opc = vecop_list,
1490 .write_aofs = true,
1491 .vece = MO_64 },
1492 };
1493
1494 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
1495 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1496 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1497 }
1498
gen_uqsub_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1499 void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1500 {
1501 TCGv_i64 tmp = tcg_temp_new_i64();
1502
1503 tcg_gen_sub_i64(tmp, a, b);
1504 tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0));
1505 tcg_gen_xor_i64(tmp, tmp, res);
1506 tcg_gen_or_i64(qc, qc, tmp);
1507 }
1508
gen_uqsub_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1509 void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1510 {
1511 TCGv_i64 t = tcg_temp_new_i64();
1512
1513 tcg_gen_sub_i64(t, a, b);
1514 tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t);
1515 tcg_gen_xor_i64(t, t, res);
1516 tcg_gen_or_i64(qc, qc, t);
1517 }
1518
gen_uqsub_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)1519 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
1520 TCGv_vec a, TCGv_vec b)
1521 {
1522 TCGv_vec x = tcg_temp_new_vec_matching(t);
1523 tcg_gen_sub_vec(vece, x, a, b);
1524 tcg_gen_ussub_vec(vece, t, a, b);
1525 tcg_gen_xor_vec(vece, x, x, t);
1526 tcg_gen_or_vec(vece, qc, qc, x);
1527 }
1528
gen_gvec_uqsub_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1529 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1530 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1531 {
1532 static const TCGOpcode vecop_list[] = {
1533 INDEX_op_ussub_vec, INDEX_op_sub_vec, 0
1534 };
1535 static const GVecGen4 ops[4] = {
1536 { .fniv = gen_uqsub_vec,
1537 .fno = gen_helper_gvec_uqsub_b,
1538 .opt_opc = vecop_list,
1539 .write_aofs = true,
1540 .vece = MO_8 },
1541 { .fniv = gen_uqsub_vec,
1542 .fno = gen_helper_gvec_uqsub_h,
1543 .opt_opc = vecop_list,
1544 .write_aofs = true,
1545 .vece = MO_16 },
1546 { .fniv = gen_uqsub_vec,
1547 .fno = gen_helper_gvec_uqsub_s,
1548 .opt_opc = vecop_list,
1549 .write_aofs = true,
1550 .vece = MO_32 },
1551 { .fniv = gen_uqsub_vec,
1552 .fni8 = gen_uqsub_d,
1553 .fno = gen_helper_gvec_uqsub_d,
1554 .opt_opc = vecop_list,
1555 .write_aofs = true,
1556 .vece = MO_64 },
1557 };
1558
1559 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
1560 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1561 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1562 }
1563
gen_sqsub_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1564 void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1565 {
1566 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
1567 int64_t min = -1ll - max;
1568 TCGv_i64 tmp = tcg_temp_new_i64();
1569
1570 tcg_gen_sub_i64(tmp, a, b);
1571 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
1572 tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
1573 tcg_gen_xor_i64(tmp, tmp, res);
1574 tcg_gen_or_i64(qc, qc, tmp);
1575 }
1576
gen_sqsub_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1577 void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1578 {
1579 TCGv_i64 t0 = tcg_temp_new_i64();
1580 TCGv_i64 t1 = tcg_temp_new_i64();
1581 TCGv_i64 t2 = tcg_temp_new_i64();
1582
1583 tcg_gen_sub_i64(t0, a, b);
1584
1585 /* Compute signed overflow indication into T1 */
1586 tcg_gen_xor_i64(t1, a, b);
1587 tcg_gen_xor_i64(t2, t0, a);
1588 tcg_gen_and_i64(t1, t1, t2);
1589
1590 /* Compute saturated value into T2 */
1591 tcg_gen_sari_i64(t2, a, 63);
1592 tcg_gen_xori_i64(t2, t2, INT64_MAX);
1593
1594 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
1595 tcg_gen_xor_i64(t0, t0, res);
1596 tcg_gen_or_i64(qc, qc, t0);
1597 }
1598
gen_sqsub_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)1599 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
1600 TCGv_vec a, TCGv_vec b)
1601 {
1602 TCGv_vec x = tcg_temp_new_vec_matching(t);
1603 tcg_gen_sub_vec(vece, x, a, b);
1604 tcg_gen_sssub_vec(vece, t, a, b);
1605 tcg_gen_xor_vec(vece, x, x, t);
1606 tcg_gen_or_vec(vece, qc, qc, x);
1607 }
1608
gen_gvec_sqsub_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1609 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1610 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1611 {
1612 static const TCGOpcode vecop_list[] = {
1613 INDEX_op_sssub_vec, INDEX_op_sub_vec, 0
1614 };
1615 static const GVecGen4 ops[4] = {
1616 { .fniv = gen_sqsub_vec,
1617 .fno = gen_helper_gvec_sqsub_b,
1618 .opt_opc = vecop_list,
1619 .write_aofs = true,
1620 .vece = MO_8 },
1621 { .fniv = gen_sqsub_vec,
1622 .fno = gen_helper_gvec_sqsub_h,
1623 .opt_opc = vecop_list,
1624 .write_aofs = true,
1625 .vece = MO_16 },
1626 { .fniv = gen_sqsub_vec,
1627 .fno = gen_helper_gvec_sqsub_s,
1628 .opt_opc = vecop_list,
1629 .write_aofs = true,
1630 .vece = MO_32 },
1631 { .fniv = gen_sqsub_vec,
1632 .fni8 = gen_sqsub_d,
1633 .fno = gen_helper_gvec_sqsub_d,
1634 .opt_opc = vecop_list,
1635 .write_aofs = true,
1636 .vece = MO_64 },
1637 };
1638
1639 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
1640 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1641 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1642 }
1643
gen_sabd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)1644 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1645 {
1646 TCGv_i32 t = tcg_temp_new_i32();
1647
1648 tcg_gen_sub_i32(t, a, b);
1649 tcg_gen_sub_i32(d, b, a);
1650 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
1651 }
1652
gen_sabd_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1653 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1654 {
1655 TCGv_i64 t = tcg_temp_new_i64();
1656
1657 tcg_gen_sub_i64(t, a, b);
1658 tcg_gen_sub_i64(d, b, a);
1659 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
1660 }
1661
gen_sabd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)1662 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1663 {
1664 TCGv_vec t = tcg_temp_new_vec_matching(d);
1665
1666 tcg_gen_smin_vec(vece, t, a, b);
1667 tcg_gen_smax_vec(vece, d, a, b);
1668 tcg_gen_sub_vec(vece, d, d, t);
1669 }
1670
gen_gvec_sabd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1671 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1672 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1673 {
1674 static const TCGOpcode vecop_list[] = {
1675 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
1676 };
1677 static const GVecGen3 ops[4] = {
1678 { .fniv = gen_sabd_vec,
1679 .fno = gen_helper_gvec_sabd_b,
1680 .opt_opc = vecop_list,
1681 .vece = MO_8 },
1682 { .fniv = gen_sabd_vec,
1683 .fno = gen_helper_gvec_sabd_h,
1684 .opt_opc = vecop_list,
1685 .vece = MO_16 },
1686 { .fni4 = gen_sabd_i32,
1687 .fniv = gen_sabd_vec,
1688 .fno = gen_helper_gvec_sabd_s,
1689 .opt_opc = vecop_list,
1690 .vece = MO_32 },
1691 { .fni8 = gen_sabd_i64,
1692 .fniv = gen_sabd_vec,
1693 .fno = gen_helper_gvec_sabd_d,
1694 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1695 .opt_opc = vecop_list,
1696 .vece = MO_64 },
1697 };
1698 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1699 }
1700
gen_uabd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)1701 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1702 {
1703 TCGv_i32 t = tcg_temp_new_i32();
1704
1705 tcg_gen_sub_i32(t, a, b);
1706 tcg_gen_sub_i32(d, b, a);
1707 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
1708 }
1709
gen_uabd_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1710 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1711 {
1712 TCGv_i64 t = tcg_temp_new_i64();
1713
1714 tcg_gen_sub_i64(t, a, b);
1715 tcg_gen_sub_i64(d, b, a);
1716 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
1717 }
1718
gen_uabd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)1719 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1720 {
1721 TCGv_vec t = tcg_temp_new_vec_matching(d);
1722
1723 tcg_gen_umin_vec(vece, t, a, b);
1724 tcg_gen_umax_vec(vece, d, a, b);
1725 tcg_gen_sub_vec(vece, d, d, t);
1726 }
1727
gen_gvec_uabd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1728 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1729 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1730 {
1731 static const TCGOpcode vecop_list[] = {
1732 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
1733 };
1734 static const GVecGen3 ops[4] = {
1735 { .fniv = gen_uabd_vec,
1736 .fno = gen_helper_gvec_uabd_b,
1737 .opt_opc = vecop_list,
1738 .vece = MO_8 },
1739 { .fniv = gen_uabd_vec,
1740 .fno = gen_helper_gvec_uabd_h,
1741 .opt_opc = vecop_list,
1742 .vece = MO_16 },
1743 { .fni4 = gen_uabd_i32,
1744 .fniv = gen_uabd_vec,
1745 .fno = gen_helper_gvec_uabd_s,
1746 .opt_opc = vecop_list,
1747 .vece = MO_32 },
1748 { .fni8 = gen_uabd_i64,
1749 .fniv = gen_uabd_vec,
1750 .fno = gen_helper_gvec_uabd_d,
1751 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1752 .opt_opc = vecop_list,
1753 .vece = MO_64 },
1754 };
1755 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1756 }
1757
gen_saba_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)1758 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1759 {
1760 TCGv_i32 t = tcg_temp_new_i32();
1761 gen_sabd_i32(t, a, b);
1762 tcg_gen_add_i32(d, d, t);
1763 }
1764
gen_saba_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1765 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1766 {
1767 TCGv_i64 t = tcg_temp_new_i64();
1768 gen_sabd_i64(t, a, b);
1769 tcg_gen_add_i64(d, d, t);
1770 }
1771
gen_saba_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)1772 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1773 {
1774 TCGv_vec t = tcg_temp_new_vec_matching(d);
1775 gen_sabd_vec(vece, t, a, b);
1776 tcg_gen_add_vec(vece, d, d, t);
1777 }
1778
gen_gvec_saba(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1779 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1780 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1781 {
1782 static const TCGOpcode vecop_list[] = {
1783 INDEX_op_sub_vec, INDEX_op_add_vec,
1784 INDEX_op_smin_vec, INDEX_op_smax_vec, 0
1785 };
1786 static const GVecGen3 ops[4] = {
1787 { .fniv = gen_saba_vec,
1788 .fno = gen_helper_gvec_saba_b,
1789 .opt_opc = vecop_list,
1790 .load_dest = true,
1791 .vece = MO_8 },
1792 { .fniv = gen_saba_vec,
1793 .fno = gen_helper_gvec_saba_h,
1794 .opt_opc = vecop_list,
1795 .load_dest = true,
1796 .vece = MO_16 },
1797 { .fni4 = gen_saba_i32,
1798 .fniv = gen_saba_vec,
1799 .fno = gen_helper_gvec_saba_s,
1800 .opt_opc = vecop_list,
1801 .load_dest = true,
1802 .vece = MO_32 },
1803 { .fni8 = gen_saba_i64,
1804 .fniv = gen_saba_vec,
1805 .fno = gen_helper_gvec_saba_d,
1806 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1807 .opt_opc = vecop_list,
1808 .load_dest = true,
1809 .vece = MO_64 },
1810 };
1811 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1812 }
1813
gen_uaba_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)1814 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1815 {
1816 TCGv_i32 t = tcg_temp_new_i32();
1817 gen_uabd_i32(t, a, b);
1818 tcg_gen_add_i32(d, d, t);
1819 }
1820
gen_uaba_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1821 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1822 {
1823 TCGv_i64 t = tcg_temp_new_i64();
1824 gen_uabd_i64(t, a, b);
1825 tcg_gen_add_i64(d, d, t);
1826 }
1827
gen_uaba_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)1828 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1829 {
1830 TCGv_vec t = tcg_temp_new_vec_matching(d);
1831 gen_uabd_vec(vece, t, a, b);
1832 tcg_gen_add_vec(vece, d, d, t);
1833 }
1834
gen_gvec_uaba(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1835 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1836 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1837 {
1838 static const TCGOpcode vecop_list[] = {
1839 INDEX_op_sub_vec, INDEX_op_add_vec,
1840 INDEX_op_umin_vec, INDEX_op_umax_vec, 0
1841 };
1842 static const GVecGen3 ops[4] = {
1843 { .fniv = gen_uaba_vec,
1844 .fno = gen_helper_gvec_uaba_b,
1845 .opt_opc = vecop_list,
1846 .load_dest = true,
1847 .vece = MO_8 },
1848 { .fniv = gen_uaba_vec,
1849 .fno = gen_helper_gvec_uaba_h,
1850 .opt_opc = vecop_list,
1851 .load_dest = true,
1852 .vece = MO_16 },
1853 { .fni4 = gen_uaba_i32,
1854 .fniv = gen_uaba_vec,
1855 .fno = gen_helper_gvec_uaba_s,
1856 .opt_opc = vecop_list,
1857 .load_dest = true,
1858 .vece = MO_32 },
1859 { .fni8 = gen_uaba_i64,
1860 .fniv = gen_uaba_vec,
1861 .fno = gen_helper_gvec_uaba_d,
1862 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1863 .opt_opc = vecop_list,
1864 .load_dest = true,
1865 .vece = MO_64 },
1866 };
1867 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1868 }
1869
gen_gvec_addp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1870 void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1871 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1872 {
1873 static gen_helper_gvec_3 * const fns[4] = {
1874 gen_helper_gvec_addp_b,
1875 gen_helper_gvec_addp_h,
1876 gen_helper_gvec_addp_s,
1877 gen_helper_gvec_addp_d,
1878 };
1879 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1880 }
1881
gen_gvec_smaxp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1882 void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1883 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1884 {
1885 static gen_helper_gvec_3 * const fns[4] = {
1886 gen_helper_gvec_smaxp_b,
1887 gen_helper_gvec_smaxp_h,
1888 gen_helper_gvec_smaxp_s,
1889 };
1890 tcg_debug_assert(vece <= MO_32);
1891 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1892 }
1893
gen_gvec_sminp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1894 void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1895 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1896 {
1897 static gen_helper_gvec_3 * const fns[4] = {
1898 gen_helper_gvec_sminp_b,
1899 gen_helper_gvec_sminp_h,
1900 gen_helper_gvec_sminp_s,
1901 };
1902 tcg_debug_assert(vece <= MO_32);
1903 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1904 }
1905
gen_gvec_umaxp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1906 void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1907 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1908 {
1909 static gen_helper_gvec_3 * const fns[4] = {
1910 gen_helper_gvec_umaxp_b,
1911 gen_helper_gvec_umaxp_h,
1912 gen_helper_gvec_umaxp_s,
1913 };
1914 tcg_debug_assert(vece <= MO_32);
1915 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1916 }
1917
gen_gvec_uminp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1918 void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1919 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1920 {
1921 static gen_helper_gvec_3 * const fns[4] = {
1922 gen_helper_gvec_uminp_b,
1923 gen_helper_gvec_uminp_h,
1924 gen_helper_gvec_uminp_s,
1925 };
1926 tcg_debug_assert(vece <= MO_32);
1927 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1928 }
1929
gen_shadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1930 static void gen_shadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1931 {
1932 TCGv_i64 t = tcg_temp_new_i64();
1933
1934 tcg_gen_and_i64(t, a, b);
1935 tcg_gen_vec_sar8i_i64(a, a, 1);
1936 tcg_gen_vec_sar8i_i64(b, b, 1);
1937 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
1938 tcg_gen_vec_add8_i64(d, a, b);
1939 tcg_gen_vec_add8_i64(d, d, t);
1940 }
1941
gen_shadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1942 static void gen_shadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1943 {
1944 TCGv_i64 t = tcg_temp_new_i64();
1945
1946 tcg_gen_and_i64(t, a, b);
1947 tcg_gen_vec_sar16i_i64(a, a, 1);
1948 tcg_gen_vec_sar16i_i64(b, b, 1);
1949 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
1950 tcg_gen_vec_add16_i64(d, a, b);
1951 tcg_gen_vec_add16_i64(d, d, t);
1952 }
1953
gen_shadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)1954 static void gen_shadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1955 {
1956 TCGv_i32 t = tcg_temp_new_i32();
1957
1958 tcg_gen_and_i32(t, a, b);
1959 tcg_gen_sari_i32(a, a, 1);
1960 tcg_gen_sari_i32(b, b, 1);
1961 tcg_gen_andi_i32(t, t, 1);
1962 tcg_gen_add_i32(d, a, b);
1963 tcg_gen_add_i32(d, d, t);
1964 }
1965
gen_shadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)1966 static void gen_shadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1967 {
1968 TCGv_vec t = tcg_temp_new_vec_matching(d);
1969
1970 tcg_gen_and_vec(vece, t, a, b);
1971 tcg_gen_sari_vec(vece, a, a, 1);
1972 tcg_gen_sari_vec(vece, b, b, 1);
1973 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
1974 tcg_gen_add_vec(vece, d, a, b);
1975 tcg_gen_add_vec(vece, d, d, t);
1976 }
1977
gen_gvec_shadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1978 void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1979 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1980 {
1981 static const TCGOpcode vecop_list[] = {
1982 INDEX_op_sari_vec, INDEX_op_add_vec, 0
1983 };
1984 static const GVecGen3 g[] = {
1985 { .fni8 = gen_shadd8_i64,
1986 .fniv = gen_shadd_vec,
1987 .opt_opc = vecop_list,
1988 .vece = MO_8 },
1989 { .fni8 = gen_shadd16_i64,
1990 .fniv = gen_shadd_vec,
1991 .opt_opc = vecop_list,
1992 .vece = MO_16 },
1993 { .fni4 = gen_shadd_i32,
1994 .fniv = gen_shadd_vec,
1995 .opt_opc = vecop_list,
1996 .vece = MO_32 },
1997 };
1998 tcg_debug_assert(vece <= MO_32);
1999 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
2000 }
2001
gen_uhadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2002 static void gen_uhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2003 {
2004 TCGv_i64 t = tcg_temp_new_i64();
2005
2006 tcg_gen_and_i64(t, a, b);
2007 tcg_gen_vec_shr8i_i64(a, a, 1);
2008 tcg_gen_vec_shr8i_i64(b, b, 1);
2009 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
2010 tcg_gen_vec_add8_i64(d, a, b);
2011 tcg_gen_vec_add8_i64(d, d, t);
2012 }
2013
gen_uhadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2014 static void gen_uhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2015 {
2016 TCGv_i64 t = tcg_temp_new_i64();
2017
2018 tcg_gen_and_i64(t, a, b);
2019 tcg_gen_vec_shr16i_i64(a, a, 1);
2020 tcg_gen_vec_shr16i_i64(b, b, 1);
2021 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
2022 tcg_gen_vec_add16_i64(d, a, b);
2023 tcg_gen_vec_add16_i64(d, d, t);
2024 }
2025
gen_uhadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)2026 static void gen_uhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2027 {
2028 TCGv_i32 t = tcg_temp_new_i32();
2029
2030 tcg_gen_and_i32(t, a, b);
2031 tcg_gen_shri_i32(a, a, 1);
2032 tcg_gen_shri_i32(b, b, 1);
2033 tcg_gen_andi_i32(t, t, 1);
2034 tcg_gen_add_i32(d, a, b);
2035 tcg_gen_add_i32(d, d, t);
2036 }
2037
gen_uhadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)2038 static void gen_uhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
2039 {
2040 TCGv_vec t = tcg_temp_new_vec_matching(d);
2041
2042 tcg_gen_and_vec(vece, t, a, b);
2043 tcg_gen_shri_vec(vece, a, a, 1);
2044 tcg_gen_shri_vec(vece, b, b, 1);
2045 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
2046 tcg_gen_add_vec(vece, d, a, b);
2047 tcg_gen_add_vec(vece, d, d, t);
2048 }
2049
gen_gvec_uhadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)2050 void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2051 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2052 {
2053 static const TCGOpcode vecop_list[] = {
2054 INDEX_op_shri_vec, INDEX_op_add_vec, 0
2055 };
2056 static const GVecGen3 g[] = {
2057 { .fni8 = gen_uhadd8_i64,
2058 .fniv = gen_uhadd_vec,
2059 .opt_opc = vecop_list,
2060 .vece = MO_8 },
2061 { .fni8 = gen_uhadd16_i64,
2062 .fniv = gen_uhadd_vec,
2063 .opt_opc = vecop_list,
2064 .vece = MO_16 },
2065 { .fni4 = gen_uhadd_i32,
2066 .fniv = gen_uhadd_vec,
2067 .opt_opc = vecop_list,
2068 .vece = MO_32 },
2069 };
2070 tcg_debug_assert(vece <= MO_32);
2071 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
2072 }
2073
gen_shsub8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2074 static void gen_shsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2075 {
2076 TCGv_i64 t = tcg_temp_new_i64();
2077
2078 tcg_gen_andc_i64(t, b, a);
2079 tcg_gen_vec_sar8i_i64(a, a, 1);
2080 tcg_gen_vec_sar8i_i64(b, b, 1);
2081 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
2082 tcg_gen_vec_sub8_i64(d, a, b);
2083 tcg_gen_vec_sub8_i64(d, d, t);
2084 }
2085
gen_shsub16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2086 static void gen_shsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2087 {
2088 TCGv_i64 t = tcg_temp_new_i64();
2089
2090 tcg_gen_andc_i64(t, b, a);
2091 tcg_gen_vec_sar16i_i64(a, a, 1);
2092 tcg_gen_vec_sar16i_i64(b, b, 1);
2093 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
2094 tcg_gen_vec_sub16_i64(d, a, b);
2095 tcg_gen_vec_sub16_i64(d, d, t);
2096 }
2097
gen_shsub_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)2098 static void gen_shsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2099 {
2100 TCGv_i32 t = tcg_temp_new_i32();
2101
2102 tcg_gen_andc_i32(t, b, a);
2103 tcg_gen_sari_i32(a, a, 1);
2104 tcg_gen_sari_i32(b, b, 1);
2105 tcg_gen_andi_i32(t, t, 1);
2106 tcg_gen_sub_i32(d, a, b);
2107 tcg_gen_sub_i32(d, d, t);
2108 }
2109
gen_shsub_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)2110 static void gen_shsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
2111 {
2112 TCGv_vec t = tcg_temp_new_vec_matching(d);
2113
2114 tcg_gen_andc_vec(vece, t, b, a);
2115 tcg_gen_sari_vec(vece, a, a, 1);
2116 tcg_gen_sari_vec(vece, b, b, 1);
2117 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
2118 tcg_gen_sub_vec(vece, d, a, b);
2119 tcg_gen_sub_vec(vece, d, d, t);
2120 }
2121
gen_gvec_shsub(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)2122 void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2123 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2124 {
2125 static const TCGOpcode vecop_list[] = {
2126 INDEX_op_sari_vec, INDEX_op_sub_vec, 0
2127 };
2128 static const GVecGen3 g[4] = {
2129 { .fni8 = gen_shsub8_i64,
2130 .fniv = gen_shsub_vec,
2131 .opt_opc = vecop_list,
2132 .vece = MO_8 },
2133 { .fni8 = gen_shsub16_i64,
2134 .fniv = gen_shsub_vec,
2135 .opt_opc = vecop_list,
2136 .vece = MO_16 },
2137 { .fni4 = gen_shsub_i32,
2138 .fniv = gen_shsub_vec,
2139 .opt_opc = vecop_list,
2140 .vece = MO_32 },
2141 };
2142 assert(vece <= MO_32);
2143 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
2144 }
2145
gen_uhsub8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2146 static void gen_uhsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2147 {
2148 TCGv_i64 t = tcg_temp_new_i64();
2149
2150 tcg_gen_andc_i64(t, b, a);
2151 tcg_gen_vec_shr8i_i64(a, a, 1);
2152 tcg_gen_vec_shr8i_i64(b, b, 1);
2153 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
2154 tcg_gen_vec_sub8_i64(d, a, b);
2155 tcg_gen_vec_sub8_i64(d, d, t);
2156 }
2157
gen_uhsub16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2158 static void gen_uhsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2159 {
2160 TCGv_i64 t = tcg_temp_new_i64();
2161
2162 tcg_gen_andc_i64(t, b, a);
2163 tcg_gen_vec_shr16i_i64(a, a, 1);
2164 tcg_gen_vec_shr16i_i64(b, b, 1);
2165 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
2166 tcg_gen_vec_sub16_i64(d, a, b);
2167 tcg_gen_vec_sub16_i64(d, d, t);
2168 }
2169
gen_uhsub_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)2170 static void gen_uhsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2171 {
2172 TCGv_i32 t = tcg_temp_new_i32();
2173
2174 tcg_gen_andc_i32(t, b, a);
2175 tcg_gen_shri_i32(a, a, 1);
2176 tcg_gen_shri_i32(b, b, 1);
2177 tcg_gen_andi_i32(t, t, 1);
2178 tcg_gen_sub_i32(d, a, b);
2179 tcg_gen_sub_i32(d, d, t);
2180 }
2181
gen_uhsub_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)2182 static void gen_uhsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
2183 {
2184 TCGv_vec t = tcg_temp_new_vec_matching(d);
2185
2186 tcg_gen_andc_vec(vece, t, b, a);
2187 tcg_gen_shri_vec(vece, a, a, 1);
2188 tcg_gen_shri_vec(vece, b, b, 1);
2189 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
2190 tcg_gen_sub_vec(vece, d, a, b);
2191 tcg_gen_sub_vec(vece, d, d, t);
2192 }
2193
gen_gvec_uhsub(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)2194 void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2195 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2196 {
2197 static const TCGOpcode vecop_list[] = {
2198 INDEX_op_shri_vec, INDEX_op_sub_vec, 0
2199 };
2200 static const GVecGen3 g[4] = {
2201 { .fni8 = gen_uhsub8_i64,
2202 .fniv = gen_uhsub_vec,
2203 .opt_opc = vecop_list,
2204 .vece = MO_8 },
2205 { .fni8 = gen_uhsub16_i64,
2206 .fniv = gen_uhsub_vec,
2207 .opt_opc = vecop_list,
2208 .vece = MO_16 },
2209 { .fni4 = gen_uhsub_i32,
2210 .fniv = gen_uhsub_vec,
2211 .opt_opc = vecop_list,
2212 .vece = MO_32 },
2213 };
2214 assert(vece <= MO_32);
2215 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
2216 }
2217
gen_srhadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2218 static void gen_srhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2219 {
2220 TCGv_i64 t = tcg_temp_new_i64();
2221
2222 tcg_gen_or_i64(t, a, b);
2223 tcg_gen_vec_sar8i_i64(a, a, 1);
2224 tcg_gen_vec_sar8i_i64(b, b, 1);
2225 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
2226 tcg_gen_vec_add8_i64(d, a, b);
2227 tcg_gen_vec_add8_i64(d, d, t);
2228 }
2229
gen_srhadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2230 static void gen_srhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2231 {
2232 TCGv_i64 t = tcg_temp_new_i64();
2233
2234 tcg_gen_or_i64(t, a, b);
2235 tcg_gen_vec_sar16i_i64(a, a, 1);
2236 tcg_gen_vec_sar16i_i64(b, b, 1);
2237 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
2238 tcg_gen_vec_add16_i64(d, a, b);
2239 tcg_gen_vec_add16_i64(d, d, t);
2240 }
2241
gen_srhadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)2242 static void gen_srhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2243 {
2244 TCGv_i32 t = tcg_temp_new_i32();
2245
2246 tcg_gen_or_i32(t, a, b);
2247 tcg_gen_sari_i32(a, a, 1);
2248 tcg_gen_sari_i32(b, b, 1);
2249 tcg_gen_andi_i32(t, t, 1);
2250 tcg_gen_add_i32(d, a, b);
2251 tcg_gen_add_i32(d, d, t);
2252 }
2253
gen_srhadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)2254 static void gen_srhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
2255 {
2256 TCGv_vec t = tcg_temp_new_vec_matching(d);
2257
2258 tcg_gen_or_vec(vece, t, a, b);
2259 tcg_gen_sari_vec(vece, a, a, 1);
2260 tcg_gen_sari_vec(vece, b, b, 1);
2261 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
2262 tcg_gen_add_vec(vece, d, a, b);
2263 tcg_gen_add_vec(vece, d, d, t);
2264 }
2265
gen_gvec_srhadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)2266 void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2267 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2268 {
2269 static const TCGOpcode vecop_list[] = {
2270 INDEX_op_sari_vec, INDEX_op_add_vec, 0
2271 };
2272 static const GVecGen3 g[] = {
2273 { .fni8 = gen_srhadd8_i64,
2274 .fniv = gen_srhadd_vec,
2275 .opt_opc = vecop_list,
2276 .vece = MO_8 },
2277 { .fni8 = gen_srhadd16_i64,
2278 .fniv = gen_srhadd_vec,
2279 .opt_opc = vecop_list,
2280 .vece = MO_16 },
2281 { .fni4 = gen_srhadd_i32,
2282 .fniv = gen_srhadd_vec,
2283 .opt_opc = vecop_list,
2284 .vece = MO_32 },
2285 };
2286 assert(vece <= MO_32);
2287 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
2288 }
2289
gen_urhadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2290 static void gen_urhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2291 {
2292 TCGv_i64 t = tcg_temp_new_i64();
2293
2294 tcg_gen_or_i64(t, a, b);
2295 tcg_gen_vec_shr8i_i64(a, a, 1);
2296 tcg_gen_vec_shr8i_i64(b, b, 1);
2297 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
2298 tcg_gen_vec_add8_i64(d, a, b);
2299 tcg_gen_vec_add8_i64(d, d, t);
2300 }
2301
gen_urhadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2302 static void gen_urhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2303 {
2304 TCGv_i64 t = tcg_temp_new_i64();
2305
2306 tcg_gen_or_i64(t, a, b);
2307 tcg_gen_vec_shr16i_i64(a, a, 1);
2308 tcg_gen_vec_shr16i_i64(b, b, 1);
2309 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
2310 tcg_gen_vec_add16_i64(d, a, b);
2311 tcg_gen_vec_add16_i64(d, d, t);
2312 }
2313
gen_urhadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)2314 static void gen_urhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2315 {
2316 TCGv_i32 t = tcg_temp_new_i32();
2317
2318 tcg_gen_or_i32(t, a, b);
2319 tcg_gen_shri_i32(a, a, 1);
2320 tcg_gen_shri_i32(b, b, 1);
2321 tcg_gen_andi_i32(t, t, 1);
2322 tcg_gen_add_i32(d, a, b);
2323 tcg_gen_add_i32(d, d, t);
2324 }
2325
gen_urhadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)2326 static void gen_urhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
2327 {
2328 TCGv_vec t = tcg_temp_new_vec_matching(d);
2329
2330 tcg_gen_or_vec(vece, t, a, b);
2331 tcg_gen_shri_vec(vece, a, a, 1);
2332 tcg_gen_shri_vec(vece, b, b, 1);
2333 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
2334 tcg_gen_add_vec(vece, d, a, b);
2335 tcg_gen_add_vec(vece, d, d, t);
2336 }
2337
gen_gvec_urhadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)2338 void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2339 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2340 {
2341 static const TCGOpcode vecop_list[] = {
2342 INDEX_op_shri_vec, INDEX_op_add_vec, 0
2343 };
2344 static const GVecGen3 g[] = {
2345 { .fni8 = gen_urhadd8_i64,
2346 .fniv = gen_urhadd_vec,
2347 .opt_opc = vecop_list,
2348 .vece = MO_8 },
2349 { .fni8 = gen_urhadd16_i64,
2350 .fniv = gen_urhadd_vec,
2351 .opt_opc = vecop_list,
2352 .vece = MO_16 },
2353 { .fni4 = gen_urhadd_i32,
2354 .fniv = gen_urhadd_vec,
2355 .opt_opc = vecop_list,
2356 .vece = MO_32 },
2357 };
2358 assert(vece <= MO_32);
2359 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
2360 }
2361