1 /*
2 * AArch64 generic vector expansion
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "translate.h"
22 #include "translate-a64.h"
23
24
gen_rax1_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)25 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
26 {
27 tcg_gen_rotli_i64(d, m, 1);
28 tcg_gen_xor_i64(d, d, n);
29 }
30
gen_rax1_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m)31 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
32 {
33 tcg_gen_rotli_vec(vece, d, m, 1);
34 tcg_gen_xor_vec(vece, d, d, n);
35 }
36
gen_gvec_rax1(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)37 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
38 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
39 {
40 static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
41 static const GVecGen3 op = {
42 .fni8 = gen_rax1_i64,
43 .fniv = gen_rax1_vec,
44 .opt_opc = vecop_list,
45 .fno = gen_helper_crypto_rax1,
46 .vece = MO_64,
47 };
48 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
49 }
50
gen_xar8_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,int64_t sh)51 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
52 {
53 TCGv_i64 t = tcg_temp_new_i64();
54 uint64_t mask = dup_const(MO_8, 0xff >> sh);
55
56 tcg_gen_xor_i64(t, n, m);
57 tcg_gen_shri_i64(d, t, sh);
58 tcg_gen_shli_i64(t, t, 8 - sh);
59 tcg_gen_andi_i64(d, d, mask);
60 tcg_gen_andi_i64(t, t, ~mask);
61 tcg_gen_or_i64(d, d, t);
62 }
63
gen_xar16_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,int64_t sh)64 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
65 {
66 TCGv_i64 t = tcg_temp_new_i64();
67 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
68
69 tcg_gen_xor_i64(t, n, m);
70 tcg_gen_shri_i64(d, t, sh);
71 tcg_gen_shli_i64(t, t, 16 - sh);
72 tcg_gen_andi_i64(d, d, mask);
73 tcg_gen_andi_i64(t, t, ~mask);
74 tcg_gen_or_i64(d, d, t);
75 }
76
gen_xar_i32(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,int32_t sh)77 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
78 {
79 tcg_gen_xor_i32(d, n, m);
80 tcg_gen_rotri_i32(d, d, sh);
81 }
82
gen_xar_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,int64_t sh)83 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
84 {
85 tcg_gen_xor_i64(d, n, m);
86 tcg_gen_rotri_i64(d, d, sh);
87 }
88
gen_xar_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,int64_t sh)89 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
90 TCGv_vec m, int64_t sh)
91 {
92 tcg_gen_xor_vec(vece, d, n, m);
93 tcg_gen_rotri_vec(vece, d, d, sh);
94 }
95
gen_gvec_xar(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)96 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
97 uint32_t rm_ofs, int64_t shift,
98 uint32_t opr_sz, uint32_t max_sz)
99 {
100 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
101 static const GVecGen3i ops[4] = {
102 { .fni8 = gen_xar8_i64,
103 .fniv = gen_xar_vec,
104 .fno = gen_helper_sve2_xar_b,
105 .opt_opc = vecop,
106 .vece = MO_8 },
107 { .fni8 = gen_xar16_i64,
108 .fniv = gen_xar_vec,
109 .fno = gen_helper_sve2_xar_h,
110 .opt_opc = vecop,
111 .vece = MO_16 },
112 { .fni4 = gen_xar_i32,
113 .fniv = gen_xar_vec,
114 .fno = gen_helper_sve2_xar_s,
115 .opt_opc = vecop,
116 .vece = MO_32 },
117 { .fni8 = gen_xar_i64,
118 .fniv = gen_xar_vec,
119 .fno = gen_helper_gvec_xar_d,
120 .opt_opc = vecop,
121 .vece = MO_64 }
122 };
123 int esize = 8 << vece;
124
125 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
126 tcg_debug_assert(shift >= 0);
127 tcg_debug_assert(shift <= esize);
128 shift &= esize - 1;
129
130 if (shift == 0) {
131 /* xar with no rotate devolves to xor. */
132 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
133 } else {
134 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
135 shift, &ops[vece]);
136 }
137 }
138
gen_eor3_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_i64 k)139 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
140 {
141 tcg_gen_xor_i64(d, n, m);
142 tcg_gen_xor_i64(d, d, k);
143 }
144
gen_eor3_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec k)145 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
146 TCGv_vec m, TCGv_vec k)
147 {
148 tcg_gen_xor_vec(vece, d, n, m);
149 tcg_gen_xor_vec(vece, d, d, k);
150 }
151
gen_gvec_eor3(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)152 void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
153 uint32_t a, uint32_t oprsz, uint32_t maxsz)
154 {
155 static const GVecGen4 op = {
156 .fni8 = gen_eor3_i64,
157 .fniv = gen_eor3_vec,
158 .fno = gen_helper_sve2_eor3,
159 .vece = MO_64,
160 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
161 };
162 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
163 }
164
gen_bcax_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_i64 k)165 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
166 {
167 tcg_gen_andc_i64(d, m, k);
168 tcg_gen_xor_i64(d, d, n);
169 }
170
gen_bcax_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec k)171 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
172 TCGv_vec m, TCGv_vec k)
173 {
174 tcg_gen_andc_vec(vece, d, m, k);
175 tcg_gen_xor_vec(vece, d, d, n);
176 }
177
gen_gvec_bcax(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)178 void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
179 uint32_t a, uint32_t oprsz, uint32_t maxsz)
180 {
181 static const GVecGen4 op = {
182 .fni8 = gen_bcax_i64,
183 .fniv = gen_bcax_vec,
184 .fno = gen_helper_sve2_bcax,
185 .vece = MO_64,
186 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
187 };
188 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
189 }
190
191 /*
192 * Set @res to the correctly saturated result.
193 * Set @qc non-zero if saturation occured.
194 */
gen_suqadd_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)195 void gen_suqadd_bhs(TCGv_i64 res, TCGv_i64 qc,
196 TCGv_i64 a, TCGv_i64 b, MemOp esz)
197 {
198 TCGv_i64 max = tcg_constant_i64((1ull << ((8 << esz) - 1)) - 1);
199 TCGv_i64 t = tcg_temp_new_i64();
200
201 tcg_gen_add_i64(t, a, b);
202 tcg_gen_smin_i64(res, t, max);
203 tcg_gen_xor_i64(t, t, res);
204 tcg_gen_or_i64(qc, qc, t);
205 }
206
gen_suqadd_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)207 void gen_suqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
208 {
209 TCGv_i64 max = tcg_constant_i64(INT64_MAX);
210 TCGv_i64 t = tcg_temp_new_i64();
211
212 /* Maximum value that can be added to @a without overflow. */
213 tcg_gen_sub_i64(t, max, a);
214
215 /* Constrain addend so that the next addition never overflows. */
216 tcg_gen_umin_i64(t, t, b);
217 tcg_gen_add_i64(res, a, t);
218
219 tcg_gen_xor_i64(t, t, b);
220 tcg_gen_or_i64(qc, qc, t);
221 }
222
gen_suqadd_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)223 static void gen_suqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
224 TCGv_vec a, TCGv_vec b)
225 {
226 TCGv_vec max =
227 tcg_constant_vec_matching(t, vece, (1ull << ((8 << vece) - 1)) - 1);
228 TCGv_vec u = tcg_temp_new_vec_matching(t);
229
230 /* Maximum value that can be added to @a without overflow. */
231 tcg_gen_sub_vec(vece, u, max, a);
232
233 /* Constrain addend so that the next addition never overflows. */
234 tcg_gen_umin_vec(vece, u, u, b);
235 tcg_gen_add_vec(vece, t, u, a);
236
237 /* Compute QC by comparing the adjusted @b. */
238 tcg_gen_xor_vec(vece, u, u, b);
239 tcg_gen_or_vec(vece, qc, qc, u);
240 }
241
gen_gvec_suqadd_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)242 void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs,
243 uint32_t rn_ofs, uint32_t rm_ofs,
244 uint32_t opr_sz, uint32_t max_sz)
245 {
246 static const TCGOpcode vecop_list[] = {
247 INDEX_op_add_vec, INDEX_op_sub_vec, INDEX_op_umin_vec, 0
248 };
249 static const GVecGen4 ops[4] = {
250 { .fniv = gen_suqadd_vec,
251 .fno = gen_helper_gvec_suqadd_b,
252 .opt_opc = vecop_list,
253 .write_aofs = true,
254 .vece = MO_8 },
255 { .fniv = gen_suqadd_vec,
256 .fno = gen_helper_gvec_suqadd_h,
257 .opt_opc = vecop_list,
258 .write_aofs = true,
259 .vece = MO_16 },
260 { .fniv = gen_suqadd_vec,
261 .fno = gen_helper_gvec_suqadd_s,
262 .opt_opc = vecop_list,
263 .write_aofs = true,
264 .vece = MO_32 },
265 { .fniv = gen_suqadd_vec,
266 .fni8 = gen_suqadd_d,
267 .fno = gen_helper_gvec_suqadd_d,
268 .opt_opc = vecop_list,
269 .write_aofs = true,
270 .vece = MO_64 },
271 };
272
273 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
274 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
275 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
276 }
277
gen_usqadd_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)278 void gen_usqadd_bhs(TCGv_i64 res, TCGv_i64 qc,
279 TCGv_i64 a, TCGv_i64 b, MemOp esz)
280 {
281 TCGv_i64 max = tcg_constant_i64(MAKE_64BIT_MASK(0, 8 << esz));
282 TCGv_i64 zero = tcg_constant_i64(0);
283 TCGv_i64 tmp = tcg_temp_new_i64();
284
285 tcg_gen_add_i64(tmp, a, b);
286 tcg_gen_smin_i64(res, tmp, max);
287 tcg_gen_smax_i64(res, res, zero);
288 tcg_gen_xor_i64(tmp, tmp, res);
289 tcg_gen_or_i64(qc, qc, tmp);
290 }
291
gen_usqadd_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)292 void gen_usqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
293 {
294 TCGv_i64 tmp = tcg_temp_new_i64();
295 TCGv_i64 tneg = tcg_temp_new_i64();
296 TCGv_i64 tpos = tcg_temp_new_i64();
297 TCGv_i64 max = tcg_constant_i64(UINT64_MAX);
298 TCGv_i64 zero = tcg_constant_i64(0);
299
300 tcg_gen_add_i64(tmp, a, b);
301
302 /* If @b is positive, saturate if (a + b) < a, aka unsigned overflow. */
303 tcg_gen_movcond_i64(TCG_COND_LTU, tpos, tmp, a, max, tmp);
304
305 /* If @b is negative, saturate if a < -b, ie subtraction is negative. */
306 tcg_gen_neg_i64(tneg, b);
307 tcg_gen_movcond_i64(TCG_COND_LTU, tneg, a, tneg, zero, tmp);
308
309 /* Select correct result from sign of @b. */
310 tcg_gen_movcond_i64(TCG_COND_LT, res, b, zero, tneg, tpos);
311 tcg_gen_xor_i64(tmp, tmp, res);
312 tcg_gen_or_i64(qc, qc, tmp);
313 }
314
gen_usqadd_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)315 static void gen_usqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
316 TCGv_vec a, TCGv_vec b)
317 {
318 TCGv_vec u = tcg_temp_new_vec_matching(t);
319 TCGv_vec z = tcg_constant_vec_matching(t, vece, 0);
320
321 /* Compute unsigned saturation of add for +b and sub for -b. */
322 tcg_gen_neg_vec(vece, t, b);
323 tcg_gen_usadd_vec(vece, u, a, b);
324 tcg_gen_ussub_vec(vece, t, a, t);
325
326 /* Select the correct result depending on the sign of b. */
327 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, b, z, t, u);
328
329 /* Compute QC by comparing against the non-saturated result. */
330 tcg_gen_add_vec(vece, u, a, b);
331 tcg_gen_xor_vec(vece, u, u, t);
332 tcg_gen_or_vec(vece, qc, qc, u);
333 }
334
gen_gvec_usqadd_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)335 void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs,
336 uint32_t rn_ofs, uint32_t rm_ofs,
337 uint32_t opr_sz, uint32_t max_sz)
338 {
339 static const TCGOpcode vecop_list[] = {
340 INDEX_op_neg_vec, INDEX_op_add_vec,
341 INDEX_op_usadd_vec, INDEX_op_ussub_vec,
342 INDEX_op_cmpsel_vec, 0
343 };
344 static const GVecGen4 ops[4] = {
345 { .fniv = gen_usqadd_vec,
346 .fno = gen_helper_gvec_usqadd_b,
347 .opt_opc = vecop_list,
348 .write_aofs = true,
349 .vece = MO_8 },
350 { .fniv = gen_usqadd_vec,
351 .fno = gen_helper_gvec_usqadd_h,
352 .opt_opc = vecop_list,
353 .write_aofs = true,
354 .vece = MO_16 },
355 { .fniv = gen_usqadd_vec,
356 .fno = gen_helper_gvec_usqadd_s,
357 .opt_opc = vecop_list,
358 .write_aofs = true,
359 .vece = MO_32 },
360 { .fniv = gen_usqadd_vec,
361 .fni8 = gen_usqadd_d,
362 .fno = gen_helper_gvec_usqadd_d,
363 .opt_opc = vecop_list,
364 .write_aofs = true,
365 .vece = MO_64 },
366 };
367
368 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
369 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
370 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
371 }
372