1a11efe30SRichard Henderson /*
2a11efe30SRichard Henderson * AArch64 generic vector expansion
3a11efe30SRichard Henderson *
4a11efe30SRichard Henderson * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5a11efe30SRichard Henderson *
6a11efe30SRichard Henderson * This library is free software; you can redistribute it and/or
7a11efe30SRichard Henderson * modify it under the terms of the GNU Lesser General Public
8a11efe30SRichard Henderson * License as published by the Free Software Foundation; either
9a11efe30SRichard Henderson * version 2.1 of the License, or (at your option) any later version.
10a11efe30SRichard Henderson *
11a11efe30SRichard Henderson * This library is distributed in the hope that it will be useful,
12a11efe30SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of
13a11efe30SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14a11efe30SRichard Henderson * Lesser General Public License for more details.
15a11efe30SRichard Henderson *
16a11efe30SRichard Henderson * You should have received a copy of the GNU Lesser General Public
17a11efe30SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18a11efe30SRichard Henderson */
19a11efe30SRichard Henderson
20a11efe30SRichard Henderson #include "qemu/osdep.h"
21a11efe30SRichard Henderson #include "translate.h"
22a11efe30SRichard Henderson #include "translate-a64.h"
23a11efe30SRichard Henderson
24a11efe30SRichard Henderson
gen_rax1_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)25a11efe30SRichard Henderson static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
26a11efe30SRichard Henderson {
27a11efe30SRichard Henderson tcg_gen_rotli_i64(d, m, 1);
28a11efe30SRichard Henderson tcg_gen_xor_i64(d, d, n);
29a11efe30SRichard Henderson }
30a11efe30SRichard Henderson
gen_rax1_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m)31a11efe30SRichard Henderson static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
32a11efe30SRichard Henderson {
33a11efe30SRichard Henderson tcg_gen_rotli_vec(vece, d, m, 1);
34a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, d, n);
35a11efe30SRichard Henderson }
36a11efe30SRichard Henderson
gen_gvec_rax1(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)37a11efe30SRichard Henderson void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
38a11efe30SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
39a11efe30SRichard Henderson {
40a11efe30SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
41a11efe30SRichard Henderson static const GVecGen3 op = {
42a11efe30SRichard Henderson .fni8 = gen_rax1_i64,
43a11efe30SRichard Henderson .fniv = gen_rax1_vec,
44a11efe30SRichard Henderson .opt_opc = vecop_list,
45a11efe30SRichard Henderson .fno = gen_helper_crypto_rax1,
46a11efe30SRichard Henderson .vece = MO_64,
47a11efe30SRichard Henderson };
48a11efe30SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
49a11efe30SRichard Henderson }
50a11efe30SRichard Henderson
gen_xar8_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,int64_t sh)51a11efe30SRichard Henderson static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
52a11efe30SRichard Henderson {
53a11efe30SRichard Henderson TCGv_i64 t = tcg_temp_new_i64();
54a11efe30SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff >> sh);
55a11efe30SRichard Henderson
56a11efe30SRichard Henderson tcg_gen_xor_i64(t, n, m);
57a11efe30SRichard Henderson tcg_gen_shri_i64(d, t, sh);
58a11efe30SRichard Henderson tcg_gen_shli_i64(t, t, 8 - sh);
59a11efe30SRichard Henderson tcg_gen_andi_i64(d, d, mask);
60a11efe30SRichard Henderson tcg_gen_andi_i64(t, t, ~mask);
61a11efe30SRichard Henderson tcg_gen_or_i64(d, d, t);
62a11efe30SRichard Henderson }
63a11efe30SRichard Henderson
gen_xar16_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,int64_t sh)64a11efe30SRichard Henderson static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
65a11efe30SRichard Henderson {
66a11efe30SRichard Henderson TCGv_i64 t = tcg_temp_new_i64();
67a11efe30SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff >> sh);
68a11efe30SRichard Henderson
69a11efe30SRichard Henderson tcg_gen_xor_i64(t, n, m);
70a11efe30SRichard Henderson tcg_gen_shri_i64(d, t, sh);
71a11efe30SRichard Henderson tcg_gen_shli_i64(t, t, 16 - sh);
72a11efe30SRichard Henderson tcg_gen_andi_i64(d, d, mask);
73a11efe30SRichard Henderson tcg_gen_andi_i64(t, t, ~mask);
74a11efe30SRichard Henderson tcg_gen_or_i64(d, d, t);
75a11efe30SRichard Henderson }
76a11efe30SRichard Henderson
gen_xar_i32(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,int32_t sh)77a11efe30SRichard Henderson static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
78a11efe30SRichard Henderson {
79a11efe30SRichard Henderson tcg_gen_xor_i32(d, n, m);
80a11efe30SRichard Henderson tcg_gen_rotri_i32(d, d, sh);
81a11efe30SRichard Henderson }
82a11efe30SRichard Henderson
gen_xar_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,int64_t sh)83a11efe30SRichard Henderson static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
84a11efe30SRichard Henderson {
85a11efe30SRichard Henderson tcg_gen_xor_i64(d, n, m);
86a11efe30SRichard Henderson tcg_gen_rotri_i64(d, d, sh);
87a11efe30SRichard Henderson }
88a11efe30SRichard Henderson
gen_xar_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,int64_t sh)89a11efe30SRichard Henderson static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
90a11efe30SRichard Henderson TCGv_vec m, int64_t sh)
91a11efe30SRichard Henderson {
92a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, n, m);
93a11efe30SRichard Henderson tcg_gen_rotri_vec(vece, d, d, sh);
94a11efe30SRichard Henderson }
95a11efe30SRichard Henderson
gen_gvec_xar(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)96a11efe30SRichard Henderson void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
97a11efe30SRichard Henderson uint32_t rm_ofs, int64_t shift,
98a11efe30SRichard Henderson uint32_t opr_sz, uint32_t max_sz)
99a11efe30SRichard Henderson {
100a11efe30SRichard Henderson static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
101a11efe30SRichard Henderson static const GVecGen3i ops[4] = {
102a11efe30SRichard Henderson { .fni8 = gen_xar8_i64,
103a11efe30SRichard Henderson .fniv = gen_xar_vec,
104a11efe30SRichard Henderson .fno = gen_helper_sve2_xar_b,
105a11efe30SRichard Henderson .opt_opc = vecop,
106a11efe30SRichard Henderson .vece = MO_8 },
107a11efe30SRichard Henderson { .fni8 = gen_xar16_i64,
108a11efe30SRichard Henderson .fniv = gen_xar_vec,
109a11efe30SRichard Henderson .fno = gen_helper_sve2_xar_h,
110a11efe30SRichard Henderson .opt_opc = vecop,
111a11efe30SRichard Henderson .vece = MO_16 },
112a11efe30SRichard Henderson { .fni4 = gen_xar_i32,
113a11efe30SRichard Henderson .fniv = gen_xar_vec,
114a11efe30SRichard Henderson .fno = gen_helper_sve2_xar_s,
115a11efe30SRichard Henderson .opt_opc = vecop,
116a11efe30SRichard Henderson .vece = MO_32 },
117a11efe30SRichard Henderson { .fni8 = gen_xar_i64,
118a11efe30SRichard Henderson .fniv = gen_xar_vec,
119a11efe30SRichard Henderson .fno = gen_helper_gvec_xar_d,
120a11efe30SRichard Henderson .opt_opc = vecop,
121a11efe30SRichard Henderson .vece = MO_64 }
122a11efe30SRichard Henderson };
123a11efe30SRichard Henderson int esize = 8 << vece;
124a11efe30SRichard Henderson
125a11efe30SRichard Henderson /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
126a11efe30SRichard Henderson tcg_debug_assert(shift >= 0);
127a11efe30SRichard Henderson tcg_debug_assert(shift <= esize);
128a11efe30SRichard Henderson shift &= esize - 1;
129a11efe30SRichard Henderson
130a11efe30SRichard Henderson if (shift == 0) {
131a11efe30SRichard Henderson /* xar with no rotate devolves to xor. */
132a11efe30SRichard Henderson tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
133a11efe30SRichard Henderson } else {
134a11efe30SRichard Henderson tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
135a11efe30SRichard Henderson shift, &ops[vece]);
136a11efe30SRichard Henderson }
137a11efe30SRichard Henderson }
138a11efe30SRichard Henderson
gen_eor3_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_i64 k)139a11efe30SRichard Henderson static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
140a11efe30SRichard Henderson {
141a11efe30SRichard Henderson tcg_gen_xor_i64(d, n, m);
142a11efe30SRichard Henderson tcg_gen_xor_i64(d, d, k);
143a11efe30SRichard Henderson }
144a11efe30SRichard Henderson
gen_eor3_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec k)145a11efe30SRichard Henderson static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
146a11efe30SRichard Henderson TCGv_vec m, TCGv_vec k)
147a11efe30SRichard Henderson {
148a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, n, m);
149a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, d, k);
150a11efe30SRichard Henderson }
151a11efe30SRichard Henderson
gen_gvec_eor3(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)152a11efe30SRichard Henderson void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
153a11efe30SRichard Henderson uint32_t a, uint32_t oprsz, uint32_t maxsz)
154a11efe30SRichard Henderson {
155a11efe30SRichard Henderson static const GVecGen4 op = {
156a11efe30SRichard Henderson .fni8 = gen_eor3_i64,
157a11efe30SRichard Henderson .fniv = gen_eor3_vec,
158a11efe30SRichard Henderson .fno = gen_helper_sve2_eor3,
159a11efe30SRichard Henderson .vece = MO_64,
160a11efe30SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64,
161a11efe30SRichard Henderson };
162a11efe30SRichard Henderson tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
163a11efe30SRichard Henderson }
164a11efe30SRichard Henderson
gen_bcax_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_i64 k)165a11efe30SRichard Henderson static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
166a11efe30SRichard Henderson {
167a11efe30SRichard Henderson tcg_gen_andc_i64(d, m, k);
168a11efe30SRichard Henderson tcg_gen_xor_i64(d, d, n);
169a11efe30SRichard Henderson }
170a11efe30SRichard Henderson
gen_bcax_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec k)171a11efe30SRichard Henderson static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
172a11efe30SRichard Henderson TCGv_vec m, TCGv_vec k)
173a11efe30SRichard Henderson {
174a11efe30SRichard Henderson tcg_gen_andc_vec(vece, d, m, k);
175a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, d, n);
176a11efe30SRichard Henderson }
177a11efe30SRichard Henderson
gen_gvec_bcax(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)178a11efe30SRichard Henderson void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
179a11efe30SRichard Henderson uint32_t a, uint32_t oprsz, uint32_t maxsz)
180a11efe30SRichard Henderson {
181a11efe30SRichard Henderson static const GVecGen4 op = {
182a11efe30SRichard Henderson .fni8 = gen_bcax_i64,
183a11efe30SRichard Henderson .fniv = gen_bcax_vec,
184a11efe30SRichard Henderson .fno = gen_helper_sve2_bcax,
185a11efe30SRichard Henderson .vece = MO_64,
186a11efe30SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64,
187a11efe30SRichard Henderson };
188a11efe30SRichard Henderson tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
189a11efe30SRichard Henderson }
190a11efe30SRichard Henderson
191*1217edacSRichard Henderson /*
192*1217edacSRichard Henderson * Set @res to the correctly saturated result.
193*1217edacSRichard Henderson * Set @qc non-zero if saturation occured.
194*1217edacSRichard Henderson */
gen_suqadd_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)195*1217edacSRichard Henderson void gen_suqadd_bhs(TCGv_i64 res, TCGv_i64 qc,
196*1217edacSRichard Henderson TCGv_i64 a, TCGv_i64 b, MemOp esz)
197*1217edacSRichard Henderson {
198*1217edacSRichard Henderson TCGv_i64 max = tcg_constant_i64((1ull << ((8 << esz) - 1)) - 1);
199*1217edacSRichard Henderson TCGv_i64 t = tcg_temp_new_i64();
200*1217edacSRichard Henderson
201*1217edacSRichard Henderson tcg_gen_add_i64(t, a, b);
202*1217edacSRichard Henderson tcg_gen_smin_i64(res, t, max);
203*1217edacSRichard Henderson tcg_gen_xor_i64(t, t, res);
204*1217edacSRichard Henderson tcg_gen_or_i64(qc, qc, t);
205*1217edacSRichard Henderson }
206*1217edacSRichard Henderson
gen_suqadd_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)207*1217edacSRichard Henderson void gen_suqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
208*1217edacSRichard Henderson {
209*1217edacSRichard Henderson TCGv_i64 max = tcg_constant_i64(INT64_MAX);
210*1217edacSRichard Henderson TCGv_i64 t = tcg_temp_new_i64();
211*1217edacSRichard Henderson
212*1217edacSRichard Henderson /* Maximum value that can be added to @a without overflow. */
213*1217edacSRichard Henderson tcg_gen_sub_i64(t, max, a);
214*1217edacSRichard Henderson
215*1217edacSRichard Henderson /* Constrain addend so that the next addition never overflows. */
216*1217edacSRichard Henderson tcg_gen_umin_i64(t, t, b);
217*1217edacSRichard Henderson tcg_gen_add_i64(res, a, t);
218*1217edacSRichard Henderson
219*1217edacSRichard Henderson tcg_gen_xor_i64(t, t, b);
220*1217edacSRichard Henderson tcg_gen_or_i64(qc, qc, t);
221*1217edacSRichard Henderson }
222*1217edacSRichard Henderson
gen_suqadd_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)2238f6343aeSRichard Henderson static void gen_suqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
2248f6343aeSRichard Henderson TCGv_vec a, TCGv_vec b)
2258f6343aeSRichard Henderson {
2268f6343aeSRichard Henderson TCGv_vec max =
2278f6343aeSRichard Henderson tcg_constant_vec_matching(t, vece, (1ull << ((8 << vece) - 1)) - 1);
2288f6343aeSRichard Henderson TCGv_vec u = tcg_temp_new_vec_matching(t);
2298f6343aeSRichard Henderson
2308f6343aeSRichard Henderson /* Maximum value that can be added to @a without overflow. */
2318f6343aeSRichard Henderson tcg_gen_sub_vec(vece, u, max, a);
2328f6343aeSRichard Henderson
2338f6343aeSRichard Henderson /* Constrain addend so that the next addition never overflows. */
2348f6343aeSRichard Henderson tcg_gen_umin_vec(vece, u, u, b);
2358f6343aeSRichard Henderson tcg_gen_add_vec(vece, t, u, a);
2368f6343aeSRichard Henderson
2378f6343aeSRichard Henderson /* Compute QC by comparing the adjusted @b. */
2388f6343aeSRichard Henderson tcg_gen_xor_vec(vece, u, u, b);
2398f6343aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, u);
2408f6343aeSRichard Henderson }
2418f6343aeSRichard Henderson
gen_gvec_suqadd_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)2428f6343aeSRichard Henderson void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs,
2438f6343aeSRichard Henderson uint32_t rn_ofs, uint32_t rm_ofs,
2448f6343aeSRichard Henderson uint32_t opr_sz, uint32_t max_sz)
2458f6343aeSRichard Henderson {
2468f6343aeSRichard Henderson static const TCGOpcode vecop_list[] = {
2478f6343aeSRichard Henderson INDEX_op_add_vec, INDEX_op_sub_vec, INDEX_op_umin_vec, 0
2488f6343aeSRichard Henderson };
2498f6343aeSRichard Henderson static const GVecGen4 ops[4] = {
2508f6343aeSRichard Henderson { .fniv = gen_suqadd_vec,
2518f6343aeSRichard Henderson .fno = gen_helper_gvec_suqadd_b,
2528f6343aeSRichard Henderson .opt_opc = vecop_list,
2538f6343aeSRichard Henderson .write_aofs = true,
2548f6343aeSRichard Henderson .vece = MO_8 },
2558f6343aeSRichard Henderson { .fniv = gen_suqadd_vec,
2568f6343aeSRichard Henderson .fno = gen_helper_gvec_suqadd_h,
2578f6343aeSRichard Henderson .opt_opc = vecop_list,
2588f6343aeSRichard Henderson .write_aofs = true,
2598f6343aeSRichard Henderson .vece = MO_16 },
2608f6343aeSRichard Henderson { .fniv = gen_suqadd_vec,
2618f6343aeSRichard Henderson .fno = gen_helper_gvec_suqadd_s,
2628f6343aeSRichard Henderson .opt_opc = vecop_list,
2638f6343aeSRichard Henderson .write_aofs = true,
2648f6343aeSRichard Henderson .vece = MO_32 },
2658f6343aeSRichard Henderson { .fniv = gen_suqadd_vec,
266*1217edacSRichard Henderson .fni8 = gen_suqadd_d,
2678f6343aeSRichard Henderson .fno = gen_helper_gvec_suqadd_d,
2688f6343aeSRichard Henderson .opt_opc = vecop_list,
2698f6343aeSRichard Henderson .write_aofs = true,
2708f6343aeSRichard Henderson .vece = MO_64 },
2718f6343aeSRichard Henderson };
2728f6343aeSRichard Henderson
2738f6343aeSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
2748f6343aeSRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
2758f6343aeSRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
2768f6343aeSRichard Henderson }
2778f6343aeSRichard Henderson
gen_usqadd_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)278*1217edacSRichard Henderson void gen_usqadd_bhs(TCGv_i64 res, TCGv_i64 qc,
279*1217edacSRichard Henderson TCGv_i64 a, TCGv_i64 b, MemOp esz)
280*1217edacSRichard Henderson {
281*1217edacSRichard Henderson TCGv_i64 max = tcg_constant_i64(MAKE_64BIT_MASK(0, 8 << esz));
282*1217edacSRichard Henderson TCGv_i64 zero = tcg_constant_i64(0);
283*1217edacSRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64();
284*1217edacSRichard Henderson
285*1217edacSRichard Henderson tcg_gen_add_i64(tmp, a, b);
286*1217edacSRichard Henderson tcg_gen_smin_i64(res, tmp, max);
287*1217edacSRichard Henderson tcg_gen_smax_i64(res, res, zero);
288*1217edacSRichard Henderson tcg_gen_xor_i64(tmp, tmp, res);
289*1217edacSRichard Henderson tcg_gen_or_i64(qc, qc, tmp);
290*1217edacSRichard Henderson }
291*1217edacSRichard Henderson
gen_usqadd_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)292*1217edacSRichard Henderson void gen_usqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
293*1217edacSRichard Henderson {
294*1217edacSRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64();
295*1217edacSRichard Henderson TCGv_i64 tneg = tcg_temp_new_i64();
296*1217edacSRichard Henderson TCGv_i64 tpos = tcg_temp_new_i64();
297*1217edacSRichard Henderson TCGv_i64 max = tcg_constant_i64(UINT64_MAX);
298*1217edacSRichard Henderson TCGv_i64 zero = tcg_constant_i64(0);
299*1217edacSRichard Henderson
300*1217edacSRichard Henderson tcg_gen_add_i64(tmp, a, b);
301*1217edacSRichard Henderson
302*1217edacSRichard Henderson /* If @b is positive, saturate if (a + b) < a, aka unsigned overflow. */
303*1217edacSRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, tpos, tmp, a, max, tmp);
304*1217edacSRichard Henderson
305*1217edacSRichard Henderson /* If @b is negative, saturate if a < -b, ie subtraction is negative. */
306*1217edacSRichard Henderson tcg_gen_neg_i64(tneg, b);
307*1217edacSRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, tneg, a, tneg, zero, tmp);
308*1217edacSRichard Henderson
309*1217edacSRichard Henderson /* Select correct result from sign of @b. */
310*1217edacSRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, b, zero, tneg, tpos);
311*1217edacSRichard Henderson tcg_gen_xor_i64(tmp, tmp, res);
312*1217edacSRichard Henderson tcg_gen_or_i64(qc, qc, tmp);
313*1217edacSRichard Henderson }
314*1217edacSRichard Henderson
gen_usqadd_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)3158f6343aeSRichard Henderson static void gen_usqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
3168f6343aeSRichard Henderson TCGv_vec a, TCGv_vec b)
3178f6343aeSRichard Henderson {
3188f6343aeSRichard Henderson TCGv_vec u = tcg_temp_new_vec_matching(t);
3198f6343aeSRichard Henderson TCGv_vec z = tcg_constant_vec_matching(t, vece, 0);
3208f6343aeSRichard Henderson
3218f6343aeSRichard Henderson /* Compute unsigned saturation of add for +b and sub for -b. */
3228f6343aeSRichard Henderson tcg_gen_neg_vec(vece, t, b);
3238f6343aeSRichard Henderson tcg_gen_usadd_vec(vece, u, a, b);
3248f6343aeSRichard Henderson tcg_gen_ussub_vec(vece, t, a, t);
3258f6343aeSRichard Henderson
3268f6343aeSRichard Henderson /* Select the correct result depending on the sign of b. */
3278f6343aeSRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, b, z, t, u);
3288f6343aeSRichard Henderson
3298f6343aeSRichard Henderson /* Compute QC by comparing against the non-saturated result. */
3308f6343aeSRichard Henderson tcg_gen_add_vec(vece, u, a, b);
3318f6343aeSRichard Henderson tcg_gen_xor_vec(vece, u, u, t);
3328f6343aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, u);
3338f6343aeSRichard Henderson }
3348f6343aeSRichard Henderson
gen_gvec_usqadd_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)3358f6343aeSRichard Henderson void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs,
3368f6343aeSRichard Henderson uint32_t rn_ofs, uint32_t rm_ofs,
3378f6343aeSRichard Henderson uint32_t opr_sz, uint32_t max_sz)
3388f6343aeSRichard Henderson {
3398f6343aeSRichard Henderson static const TCGOpcode vecop_list[] = {
3408f6343aeSRichard Henderson INDEX_op_neg_vec, INDEX_op_add_vec,
3418f6343aeSRichard Henderson INDEX_op_usadd_vec, INDEX_op_ussub_vec,
3428f6343aeSRichard Henderson INDEX_op_cmpsel_vec, 0
3438f6343aeSRichard Henderson };
3448f6343aeSRichard Henderson static const GVecGen4 ops[4] = {
3458f6343aeSRichard Henderson { .fniv = gen_usqadd_vec,
3468f6343aeSRichard Henderson .fno = gen_helper_gvec_usqadd_b,
3478f6343aeSRichard Henderson .opt_opc = vecop_list,
3488f6343aeSRichard Henderson .write_aofs = true,
3498f6343aeSRichard Henderson .vece = MO_8 },
3508f6343aeSRichard Henderson { .fniv = gen_usqadd_vec,
3518f6343aeSRichard Henderson .fno = gen_helper_gvec_usqadd_h,
3528f6343aeSRichard Henderson .opt_opc = vecop_list,
3538f6343aeSRichard Henderson .write_aofs = true,
3548f6343aeSRichard Henderson .vece = MO_16 },
3558f6343aeSRichard Henderson { .fniv = gen_usqadd_vec,
3568f6343aeSRichard Henderson .fno = gen_helper_gvec_usqadd_s,
3578f6343aeSRichard Henderson .opt_opc = vecop_list,
3588f6343aeSRichard Henderson .write_aofs = true,
3598f6343aeSRichard Henderson .vece = MO_32 },
3608f6343aeSRichard Henderson { .fniv = gen_usqadd_vec,
361*1217edacSRichard Henderson .fni8 = gen_usqadd_d,
3628f6343aeSRichard Henderson .fno = gen_helper_gvec_usqadd_d,
3638f6343aeSRichard Henderson .opt_opc = vecop_list,
3648f6343aeSRichard Henderson .write_aofs = true,
3658f6343aeSRichard Henderson .vece = MO_64 },
3668f6343aeSRichard Henderson };
3678f6343aeSRichard Henderson
3688f6343aeSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
3698f6343aeSRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
3708f6343aeSRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3718f6343aeSRichard Henderson }
372