xref: /openbmc/qemu/target/arm/tcg/gengvec.c (revision 8f81dced)
109a52d85SRichard Henderson /*
209a52d85SRichard Henderson  *  ARM generic vector expansion
309a52d85SRichard Henderson  *
409a52d85SRichard Henderson  *  Copyright (c) 2003 Fabrice Bellard
509a52d85SRichard Henderson  *  Copyright (c) 2005-2007 CodeSourcery
609a52d85SRichard Henderson  *  Copyright (c) 2007 OpenedHand, Ltd.
709a52d85SRichard Henderson  *
809a52d85SRichard Henderson  * This library is free software; you can redistribute it and/or
909a52d85SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
1009a52d85SRichard Henderson  * License as published by the Free Software Foundation; either
1109a52d85SRichard Henderson  * version 2.1 of the License, or (at your option) any later version.
1209a52d85SRichard Henderson  *
1309a52d85SRichard Henderson  * This library is distributed in the hope that it will be useful,
1409a52d85SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
1509a52d85SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1609a52d85SRichard Henderson  * Lesser General Public License for more details.
1709a52d85SRichard Henderson  *
1809a52d85SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
1909a52d85SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
2009a52d85SRichard Henderson  */
2109a52d85SRichard Henderson 
2209a52d85SRichard Henderson #include "qemu/osdep.h"
2309a52d85SRichard Henderson #include "translate.h"
2409a52d85SRichard Henderson 
2509a52d85SRichard Henderson 
gen_gvec_fn3_qc(uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz,gen_helper_gvec_3_ptr * fn)2609a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2709a52d85SRichard Henderson                             uint32_t opr_sz, uint32_t max_sz,
2809a52d85SRichard Henderson                             gen_helper_gvec_3_ptr *fn)
2909a52d85SRichard Henderson {
3009a52d85SRichard Henderson     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3109a52d85SRichard Henderson 
3201d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
3309a52d85SRichard Henderson     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
3409a52d85SRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3509a52d85SRichard Henderson                        opr_sz, max_sz, 0, fn);
3609a52d85SRichard Henderson }
3709a52d85SRichard Henderson 
gen_gvec_sqdmulh_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)38*8f81dcedSRichard Henderson void gen_gvec_sqdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
39*8f81dcedSRichard Henderson                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
40*8f81dcedSRichard Henderson {
41*8f81dcedSRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
42*8f81dcedSRichard Henderson         gen_helper_neon_sqdmulh_h, gen_helper_neon_sqdmulh_s
43*8f81dcedSRichard Henderson     };
44*8f81dcedSRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
45*8f81dcedSRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
46*8f81dcedSRichard Henderson }
47*8f81dcedSRichard Henderson 
gen_gvec_sqrdmulh_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)48*8f81dcedSRichard Henderson void gen_gvec_sqrdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
49*8f81dcedSRichard Henderson                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
50*8f81dcedSRichard Henderson {
51*8f81dcedSRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
52*8f81dcedSRichard Henderson         gen_helper_neon_sqrdmulh_h, gen_helper_neon_sqrdmulh_s
53*8f81dcedSRichard Henderson     };
54*8f81dcedSRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
55*8f81dcedSRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
56*8f81dcedSRichard Henderson }
57*8f81dcedSRichard Henderson 
gen_gvec_sqrdmlah_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)5809a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5909a52d85SRichard Henderson                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
6009a52d85SRichard Henderson {
6109a52d85SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
6209a52d85SRichard Henderson         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
6309a52d85SRichard Henderson     };
6409a52d85SRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
6509a52d85SRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
6609a52d85SRichard Henderson }
6709a52d85SRichard Henderson 
gen_gvec_sqrdmlsh_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)6809a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
6909a52d85SRichard Henderson                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
7009a52d85SRichard Henderson {
7109a52d85SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
7209a52d85SRichard Henderson         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
7309a52d85SRichard Henderson     };
7409a52d85SRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
7509a52d85SRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
7609a52d85SRichard Henderson }
7709a52d85SRichard Henderson 
7809a52d85SRichard Henderson #define GEN_CMP0(NAME, COND)                              \
7909a52d85SRichard Henderson     void NAME(unsigned vece, uint32_t d, uint32_t m,      \
8009a52d85SRichard Henderson               uint32_t opr_sz, uint32_t max_sz)           \
8109a52d85SRichard Henderson     { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
8209a52d85SRichard Henderson 
GEN_CMP0(gen_gvec_ceq0,TCG_COND_EQ)8309a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
8409a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
8509a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
8609a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
8709a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
8809a52d85SRichard Henderson 
8909a52d85SRichard Henderson #undef GEN_CMP0
9009a52d85SRichard Henderson 
9109a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9209a52d85SRichard Henderson {
9309a52d85SRichard Henderson     tcg_gen_vec_sar8i_i64(a, a, shift);
9409a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, a);
9509a52d85SRichard Henderson }
9609a52d85SRichard Henderson 
gen_ssra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)9709a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9809a52d85SRichard Henderson {
9909a52d85SRichard Henderson     tcg_gen_vec_sar16i_i64(a, a, shift);
10009a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, a);
10109a52d85SRichard Henderson }
10209a52d85SRichard Henderson 
gen_ssra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)10309a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
10409a52d85SRichard Henderson {
10509a52d85SRichard Henderson     tcg_gen_sari_i32(a, a, shift);
10609a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
10709a52d85SRichard Henderson }
10809a52d85SRichard Henderson 
gen_ssra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)10909a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
11009a52d85SRichard Henderson {
11109a52d85SRichard Henderson     tcg_gen_sari_i64(a, a, shift);
11209a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
11309a52d85SRichard Henderson }
11409a52d85SRichard Henderson 
gen_ssra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)11509a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
11609a52d85SRichard Henderson {
11709a52d85SRichard Henderson     tcg_gen_sari_vec(vece, a, a, sh);
11809a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
11909a52d85SRichard Henderson }
12009a52d85SRichard Henderson 
gen_gvec_ssra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)12109a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
12209a52d85SRichard Henderson                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
12309a52d85SRichard Henderson {
12409a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
12509a52d85SRichard Henderson         INDEX_op_sari_vec, INDEX_op_add_vec, 0
12609a52d85SRichard Henderson     };
12709a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
12809a52d85SRichard Henderson         { .fni8 = gen_ssra8_i64,
12909a52d85SRichard Henderson           .fniv = gen_ssra_vec,
13009a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_b,
13109a52d85SRichard Henderson           .load_dest = true,
13209a52d85SRichard Henderson           .opt_opc = vecop_list,
13309a52d85SRichard Henderson           .vece = MO_8 },
13409a52d85SRichard Henderson         { .fni8 = gen_ssra16_i64,
13509a52d85SRichard Henderson           .fniv = gen_ssra_vec,
13609a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_h,
13709a52d85SRichard Henderson           .load_dest = true,
13809a52d85SRichard Henderson           .opt_opc = vecop_list,
13909a52d85SRichard Henderson           .vece = MO_16 },
14009a52d85SRichard Henderson         { .fni4 = gen_ssra32_i32,
14109a52d85SRichard Henderson           .fniv = gen_ssra_vec,
14209a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_s,
14309a52d85SRichard Henderson           .load_dest = true,
14409a52d85SRichard Henderson           .opt_opc = vecop_list,
14509a52d85SRichard Henderson           .vece = MO_32 },
14609a52d85SRichard Henderson         { .fni8 = gen_ssra64_i64,
14709a52d85SRichard Henderson           .fniv = gen_ssra_vec,
14809a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_d,
14909a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
15009a52d85SRichard Henderson           .opt_opc = vecop_list,
15109a52d85SRichard Henderson           .load_dest = true,
15209a52d85SRichard Henderson           .vece = MO_64 },
15309a52d85SRichard Henderson     };
15409a52d85SRichard Henderson 
15509a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
15609a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
15709a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
15809a52d85SRichard Henderson 
15909a52d85SRichard Henderson     /*
16009a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
16109a52d85SRichard Henderson      * Signed results in all sign bits.
16209a52d85SRichard Henderson      */
16309a52d85SRichard Henderson     shift = MIN(shift, (8 << vece) - 1);
16409a52d85SRichard Henderson     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
16509a52d85SRichard Henderson }
16609a52d85SRichard Henderson 
gen_usra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)16709a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
16809a52d85SRichard Henderson {
16909a52d85SRichard Henderson     tcg_gen_vec_shr8i_i64(a, a, shift);
17009a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, a);
17109a52d85SRichard Henderson }
17209a52d85SRichard Henderson 
gen_usra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)17309a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
17409a52d85SRichard Henderson {
17509a52d85SRichard Henderson     tcg_gen_vec_shr16i_i64(a, a, shift);
17609a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, a);
17709a52d85SRichard Henderson }
17809a52d85SRichard Henderson 
gen_usra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)17909a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
18009a52d85SRichard Henderson {
18109a52d85SRichard Henderson     tcg_gen_shri_i32(a, a, shift);
18209a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
18309a52d85SRichard Henderson }
18409a52d85SRichard Henderson 
gen_usra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)18509a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
18609a52d85SRichard Henderson {
18709a52d85SRichard Henderson     tcg_gen_shri_i64(a, a, shift);
18809a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
18909a52d85SRichard Henderson }
19009a52d85SRichard Henderson 
gen_usra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)19109a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
19209a52d85SRichard Henderson {
19309a52d85SRichard Henderson     tcg_gen_shri_vec(vece, a, a, sh);
19409a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
19509a52d85SRichard Henderson }
19609a52d85SRichard Henderson 
gen_gvec_usra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)19709a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
19809a52d85SRichard Henderson                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
19909a52d85SRichard Henderson {
20009a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
20109a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
20209a52d85SRichard Henderson     };
20309a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
20409a52d85SRichard Henderson         { .fni8 = gen_usra8_i64,
20509a52d85SRichard Henderson           .fniv = gen_usra_vec,
20609a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_b,
20709a52d85SRichard Henderson           .load_dest = true,
20809a52d85SRichard Henderson           .opt_opc = vecop_list,
20909a52d85SRichard Henderson           .vece = MO_8, },
21009a52d85SRichard Henderson         { .fni8 = gen_usra16_i64,
21109a52d85SRichard Henderson           .fniv = gen_usra_vec,
21209a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_h,
21309a52d85SRichard Henderson           .load_dest = true,
21409a52d85SRichard Henderson           .opt_opc = vecop_list,
21509a52d85SRichard Henderson           .vece = MO_16, },
21609a52d85SRichard Henderson         { .fni4 = gen_usra32_i32,
21709a52d85SRichard Henderson           .fniv = gen_usra_vec,
21809a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_s,
21909a52d85SRichard Henderson           .load_dest = true,
22009a52d85SRichard Henderson           .opt_opc = vecop_list,
22109a52d85SRichard Henderson           .vece = MO_32, },
22209a52d85SRichard Henderson         { .fni8 = gen_usra64_i64,
22309a52d85SRichard Henderson           .fniv = gen_usra_vec,
22409a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_d,
22509a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
22609a52d85SRichard Henderson           .load_dest = true,
22709a52d85SRichard Henderson           .opt_opc = vecop_list,
22809a52d85SRichard Henderson           .vece = MO_64, },
22909a52d85SRichard Henderson     };
23009a52d85SRichard Henderson 
23109a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
23209a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
23309a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
23409a52d85SRichard Henderson 
23509a52d85SRichard Henderson     /*
23609a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
23709a52d85SRichard Henderson      * Unsigned results in all zeros as input to accumulate: nop.
23809a52d85SRichard Henderson      */
23909a52d85SRichard Henderson     if (shift < (8 << vece)) {
24009a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
24109a52d85SRichard Henderson     } else {
24209a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
24309a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
24409a52d85SRichard Henderson     }
24509a52d85SRichard Henderson }
24609a52d85SRichard Henderson 
24709a52d85SRichard Henderson /*
24809a52d85SRichard Henderson  * Shift one less than the requested amount, and the low bit is
24909a52d85SRichard Henderson  * the rounding bit.  For the 8 and 16-bit operations, because we
25009a52d85SRichard Henderson  * mask the low bit, we can perform a normal integer shift instead
25109a52d85SRichard Henderson  * of a vector shift.
25209a52d85SRichard Henderson  */
gen_srshr8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)25309a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
25409a52d85SRichard Henderson {
25509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
25609a52d85SRichard Henderson 
25709a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
25809a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
25909a52d85SRichard Henderson     tcg_gen_vec_sar8i_i64(d, a, sh);
26009a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
26109a52d85SRichard Henderson }
26209a52d85SRichard Henderson 
gen_srshr16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)26309a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
26409a52d85SRichard Henderson {
26509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
26609a52d85SRichard Henderson 
26709a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
26809a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
26909a52d85SRichard Henderson     tcg_gen_vec_sar16i_i64(d, a, sh);
27009a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
27109a52d85SRichard Henderson }
27209a52d85SRichard Henderson 
gen_srshr32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)27309a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
27409a52d85SRichard Henderson {
27509a52d85SRichard Henderson     TCGv_i32 t;
27609a52d85SRichard Henderson 
27709a52d85SRichard Henderson     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
27809a52d85SRichard Henderson     if (sh == 32) {
27909a52d85SRichard Henderson         tcg_gen_movi_i32(d, 0);
28009a52d85SRichard Henderson         return;
28109a52d85SRichard Henderson     }
28209a52d85SRichard Henderson     t = tcg_temp_new_i32();
28309a52d85SRichard Henderson     tcg_gen_extract_i32(t, a, sh - 1, 1);
28409a52d85SRichard Henderson     tcg_gen_sari_i32(d, a, sh);
28509a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
28609a52d85SRichard Henderson }
28709a52d85SRichard Henderson 
gen_srshr64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)28809a52d85SRichard Henderson  void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
28909a52d85SRichard Henderson {
29009a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
29109a52d85SRichard Henderson 
29209a52d85SRichard Henderson     tcg_gen_extract_i64(t, a, sh - 1, 1);
29309a52d85SRichard Henderson     tcg_gen_sari_i64(d, a, sh);
29409a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
29509a52d85SRichard Henderson }
29609a52d85SRichard Henderson 
gen_srshr_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)29709a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
29809a52d85SRichard Henderson {
29909a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
30009a52d85SRichard Henderson     TCGv_vec ones = tcg_temp_new_vec_matching(d);
30109a52d85SRichard Henderson 
30209a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, sh - 1);
30309a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, ones, 1);
30409a52d85SRichard Henderson     tcg_gen_and_vec(vece, t, t, ones);
30509a52d85SRichard Henderson     tcg_gen_sari_vec(vece, d, a, sh);
30609a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
30709a52d85SRichard Henderson }
30809a52d85SRichard Henderson 
gen_gvec_srshr(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)30909a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
31009a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
31109a52d85SRichard Henderson {
31209a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
31309a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
31409a52d85SRichard Henderson     };
31509a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
31609a52d85SRichard Henderson         { .fni8 = gen_srshr8_i64,
31709a52d85SRichard Henderson           .fniv = gen_srshr_vec,
31809a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_b,
31909a52d85SRichard Henderson           .opt_opc = vecop_list,
32009a52d85SRichard Henderson           .vece = MO_8 },
32109a52d85SRichard Henderson         { .fni8 = gen_srshr16_i64,
32209a52d85SRichard Henderson           .fniv = gen_srshr_vec,
32309a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_h,
32409a52d85SRichard Henderson           .opt_opc = vecop_list,
32509a52d85SRichard Henderson           .vece = MO_16 },
32609a52d85SRichard Henderson         { .fni4 = gen_srshr32_i32,
32709a52d85SRichard Henderson           .fniv = gen_srshr_vec,
32809a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_s,
32909a52d85SRichard Henderson           .opt_opc = vecop_list,
33009a52d85SRichard Henderson           .vece = MO_32 },
33109a52d85SRichard Henderson         { .fni8 = gen_srshr64_i64,
33209a52d85SRichard Henderson           .fniv = gen_srshr_vec,
33309a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_d,
33409a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
33509a52d85SRichard Henderson           .opt_opc = vecop_list,
33609a52d85SRichard Henderson           .vece = MO_64 },
33709a52d85SRichard Henderson     };
33809a52d85SRichard Henderson 
33909a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
34009a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
34109a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
34209a52d85SRichard Henderson 
34309a52d85SRichard Henderson     if (shift == (8 << vece)) {
34409a52d85SRichard Henderson         /*
34509a52d85SRichard Henderson          * Shifts larger than the element size are architecturally valid.
34609a52d85SRichard Henderson          * Signed results in all sign bits.  With rounding, this produces
34709a52d85SRichard Henderson          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
34809a52d85SRichard Henderson          * I.e. always zero.
34909a52d85SRichard Henderson          */
35009a52d85SRichard Henderson         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
35109a52d85SRichard Henderson     } else {
35209a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
35309a52d85SRichard Henderson     }
35409a52d85SRichard Henderson }
35509a52d85SRichard Henderson 
gen_srsra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)35609a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
35709a52d85SRichard Henderson {
35809a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
35909a52d85SRichard Henderson 
36009a52d85SRichard Henderson     gen_srshr8_i64(t, a, sh);
36109a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
36209a52d85SRichard Henderson }
36309a52d85SRichard Henderson 
gen_srsra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)36409a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
36509a52d85SRichard Henderson {
36609a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
36709a52d85SRichard Henderson 
36809a52d85SRichard Henderson     gen_srshr16_i64(t, a, sh);
36909a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
37009a52d85SRichard Henderson }
37109a52d85SRichard Henderson 
gen_srsra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)37209a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
37309a52d85SRichard Henderson {
37409a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
37509a52d85SRichard Henderson 
37609a52d85SRichard Henderson     gen_srshr32_i32(t, a, sh);
37709a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
37809a52d85SRichard Henderson }
37909a52d85SRichard Henderson 
gen_srsra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)38009a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
38109a52d85SRichard Henderson {
38209a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
38309a52d85SRichard Henderson 
38409a52d85SRichard Henderson     gen_srshr64_i64(t, a, sh);
38509a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
38609a52d85SRichard Henderson }
38709a52d85SRichard Henderson 
gen_srsra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)38809a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
38909a52d85SRichard Henderson {
39009a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
39109a52d85SRichard Henderson 
39209a52d85SRichard Henderson     gen_srshr_vec(vece, t, a, sh);
39309a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
39409a52d85SRichard Henderson }
39509a52d85SRichard Henderson 
gen_gvec_srsra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)39609a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
39709a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
39809a52d85SRichard Henderson {
39909a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
40009a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
40109a52d85SRichard Henderson     };
40209a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
40309a52d85SRichard Henderson         { .fni8 = gen_srsra8_i64,
40409a52d85SRichard Henderson           .fniv = gen_srsra_vec,
40509a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_b,
40609a52d85SRichard Henderson           .opt_opc = vecop_list,
40709a52d85SRichard Henderson           .load_dest = true,
40809a52d85SRichard Henderson           .vece = MO_8 },
40909a52d85SRichard Henderson         { .fni8 = gen_srsra16_i64,
41009a52d85SRichard Henderson           .fniv = gen_srsra_vec,
41109a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_h,
41209a52d85SRichard Henderson           .opt_opc = vecop_list,
41309a52d85SRichard Henderson           .load_dest = true,
41409a52d85SRichard Henderson           .vece = MO_16 },
41509a52d85SRichard Henderson         { .fni4 = gen_srsra32_i32,
41609a52d85SRichard Henderson           .fniv = gen_srsra_vec,
41709a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_s,
41809a52d85SRichard Henderson           .opt_opc = vecop_list,
41909a52d85SRichard Henderson           .load_dest = true,
42009a52d85SRichard Henderson           .vece = MO_32 },
42109a52d85SRichard Henderson         { .fni8 = gen_srsra64_i64,
42209a52d85SRichard Henderson           .fniv = gen_srsra_vec,
42309a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_d,
42409a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
42509a52d85SRichard Henderson           .opt_opc = vecop_list,
42609a52d85SRichard Henderson           .load_dest = true,
42709a52d85SRichard Henderson           .vece = MO_64 },
42809a52d85SRichard Henderson     };
42909a52d85SRichard Henderson 
43009a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
43109a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
43209a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
43309a52d85SRichard Henderson 
43409a52d85SRichard Henderson     /*
43509a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
43609a52d85SRichard Henderson      * Signed results in all sign bits.  With rounding, this produces
43709a52d85SRichard Henderson      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
43809a52d85SRichard Henderson      * I.e. always zero.  With accumulation, this leaves D unchanged.
43909a52d85SRichard Henderson      */
44009a52d85SRichard Henderson     if (shift == (8 << vece)) {
44109a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
44209a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
44309a52d85SRichard Henderson     } else {
44409a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
44509a52d85SRichard Henderson     }
44609a52d85SRichard Henderson }
44709a52d85SRichard Henderson 
gen_urshr8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)44809a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
44909a52d85SRichard Henderson {
45009a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
45109a52d85SRichard Henderson 
45209a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
45309a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
45409a52d85SRichard Henderson     tcg_gen_vec_shr8i_i64(d, a, sh);
45509a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
45609a52d85SRichard Henderson }
45709a52d85SRichard Henderson 
gen_urshr16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)45809a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
45909a52d85SRichard Henderson {
46009a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
46109a52d85SRichard Henderson 
46209a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
46309a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
46409a52d85SRichard Henderson     tcg_gen_vec_shr16i_i64(d, a, sh);
46509a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
46609a52d85SRichard Henderson }
46709a52d85SRichard Henderson 
gen_urshr32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)46809a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
46909a52d85SRichard Henderson {
47009a52d85SRichard Henderson     TCGv_i32 t;
47109a52d85SRichard Henderson 
47209a52d85SRichard Henderson     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
47309a52d85SRichard Henderson     if (sh == 32) {
47409a52d85SRichard Henderson         tcg_gen_extract_i32(d, a, sh - 1, 1);
47509a52d85SRichard Henderson         return;
47609a52d85SRichard Henderson     }
47709a52d85SRichard Henderson     t = tcg_temp_new_i32();
47809a52d85SRichard Henderson     tcg_gen_extract_i32(t, a, sh - 1, 1);
47909a52d85SRichard Henderson     tcg_gen_shri_i32(d, a, sh);
48009a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
48109a52d85SRichard Henderson }
48209a52d85SRichard Henderson 
gen_urshr64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)48309a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
48409a52d85SRichard Henderson {
48509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
48609a52d85SRichard Henderson 
48709a52d85SRichard Henderson     tcg_gen_extract_i64(t, a, sh - 1, 1);
48809a52d85SRichard Henderson     tcg_gen_shri_i64(d, a, sh);
48909a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
49009a52d85SRichard Henderson }
49109a52d85SRichard Henderson 
gen_urshr_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t shift)49209a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
49309a52d85SRichard Henderson {
49409a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
49509a52d85SRichard Henderson     TCGv_vec ones = tcg_temp_new_vec_matching(d);
49609a52d85SRichard Henderson 
49709a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, shift - 1);
49809a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, ones, 1);
49909a52d85SRichard Henderson     tcg_gen_and_vec(vece, t, t, ones);
50009a52d85SRichard Henderson     tcg_gen_shri_vec(vece, d, a, shift);
50109a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
50209a52d85SRichard Henderson }
50309a52d85SRichard Henderson 
gen_gvec_urshr(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)50409a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
50509a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
50609a52d85SRichard Henderson {
50709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
50809a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
50909a52d85SRichard Henderson     };
51009a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
51109a52d85SRichard Henderson         { .fni8 = gen_urshr8_i64,
51209a52d85SRichard Henderson           .fniv = gen_urshr_vec,
51309a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_b,
51409a52d85SRichard Henderson           .opt_opc = vecop_list,
51509a52d85SRichard Henderson           .vece = MO_8 },
51609a52d85SRichard Henderson         { .fni8 = gen_urshr16_i64,
51709a52d85SRichard Henderson           .fniv = gen_urshr_vec,
51809a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_h,
51909a52d85SRichard Henderson           .opt_opc = vecop_list,
52009a52d85SRichard Henderson           .vece = MO_16 },
52109a52d85SRichard Henderson         { .fni4 = gen_urshr32_i32,
52209a52d85SRichard Henderson           .fniv = gen_urshr_vec,
52309a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_s,
52409a52d85SRichard Henderson           .opt_opc = vecop_list,
52509a52d85SRichard Henderson           .vece = MO_32 },
52609a52d85SRichard Henderson         { .fni8 = gen_urshr64_i64,
52709a52d85SRichard Henderson           .fniv = gen_urshr_vec,
52809a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_d,
52909a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
53009a52d85SRichard Henderson           .opt_opc = vecop_list,
53109a52d85SRichard Henderson           .vece = MO_64 },
53209a52d85SRichard Henderson     };
53309a52d85SRichard Henderson 
53409a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
53509a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
53609a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
53709a52d85SRichard Henderson 
53809a52d85SRichard Henderson     if (shift == (8 << vece)) {
53909a52d85SRichard Henderson         /*
54009a52d85SRichard Henderson          * Shifts larger than the element size are architecturally valid.
54109a52d85SRichard Henderson          * Unsigned results in zero.  With rounding, this produces a
54209a52d85SRichard Henderson          * copy of the most significant bit.
54309a52d85SRichard Henderson          */
54409a52d85SRichard Henderson         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
54509a52d85SRichard Henderson     } else {
54609a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
54709a52d85SRichard Henderson     }
54809a52d85SRichard Henderson }
54909a52d85SRichard Henderson 
gen_ursra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)55009a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
55109a52d85SRichard Henderson {
55209a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
55309a52d85SRichard Henderson 
55409a52d85SRichard Henderson     if (sh == 8) {
55509a52d85SRichard Henderson         tcg_gen_vec_shr8i_i64(t, a, 7);
55609a52d85SRichard Henderson     } else {
55709a52d85SRichard Henderson         gen_urshr8_i64(t, a, sh);
55809a52d85SRichard Henderson     }
55909a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
56009a52d85SRichard Henderson }
56109a52d85SRichard Henderson 
gen_ursra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)56209a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
56309a52d85SRichard Henderson {
56409a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
56509a52d85SRichard Henderson 
56609a52d85SRichard Henderson     if (sh == 16) {
56709a52d85SRichard Henderson         tcg_gen_vec_shr16i_i64(t, a, 15);
56809a52d85SRichard Henderson     } else {
56909a52d85SRichard Henderson         gen_urshr16_i64(t, a, sh);
57009a52d85SRichard Henderson     }
57109a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
57209a52d85SRichard Henderson }
57309a52d85SRichard Henderson 
gen_ursra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)57409a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
57509a52d85SRichard Henderson {
57609a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
57709a52d85SRichard Henderson 
57809a52d85SRichard Henderson     if (sh == 32) {
57909a52d85SRichard Henderson         tcg_gen_shri_i32(t, a, 31);
58009a52d85SRichard Henderson     } else {
58109a52d85SRichard Henderson         gen_urshr32_i32(t, a, sh);
58209a52d85SRichard Henderson     }
58309a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
58409a52d85SRichard Henderson }
58509a52d85SRichard Henderson 
gen_ursra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)58609a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
58709a52d85SRichard Henderson {
58809a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
58909a52d85SRichard Henderson 
59009a52d85SRichard Henderson     if (sh == 64) {
59109a52d85SRichard Henderson         tcg_gen_shri_i64(t, a, 63);
59209a52d85SRichard Henderson     } else {
59309a52d85SRichard Henderson         gen_urshr64_i64(t, a, sh);
59409a52d85SRichard Henderson     }
59509a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
59609a52d85SRichard Henderson }
59709a52d85SRichard Henderson 
gen_ursra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)59809a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
59909a52d85SRichard Henderson {
60009a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
60109a52d85SRichard Henderson 
60209a52d85SRichard Henderson     if (sh == (8 << vece)) {
60309a52d85SRichard Henderson         tcg_gen_shri_vec(vece, t, a, sh - 1);
60409a52d85SRichard Henderson     } else {
60509a52d85SRichard Henderson         gen_urshr_vec(vece, t, a, sh);
60609a52d85SRichard Henderson     }
60709a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
60809a52d85SRichard Henderson }
60909a52d85SRichard Henderson 
gen_gvec_ursra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)61009a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
61109a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
61209a52d85SRichard Henderson {
61309a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
61409a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
61509a52d85SRichard Henderson     };
61609a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
61709a52d85SRichard Henderson         { .fni8 = gen_ursra8_i64,
61809a52d85SRichard Henderson           .fniv = gen_ursra_vec,
61909a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_b,
62009a52d85SRichard Henderson           .opt_opc = vecop_list,
62109a52d85SRichard Henderson           .load_dest = true,
62209a52d85SRichard Henderson           .vece = MO_8 },
62309a52d85SRichard Henderson         { .fni8 = gen_ursra16_i64,
62409a52d85SRichard Henderson           .fniv = gen_ursra_vec,
62509a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_h,
62609a52d85SRichard Henderson           .opt_opc = vecop_list,
62709a52d85SRichard Henderson           .load_dest = true,
62809a52d85SRichard Henderson           .vece = MO_16 },
62909a52d85SRichard Henderson         { .fni4 = gen_ursra32_i32,
63009a52d85SRichard Henderson           .fniv = gen_ursra_vec,
63109a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_s,
63209a52d85SRichard Henderson           .opt_opc = vecop_list,
63309a52d85SRichard Henderson           .load_dest = true,
63409a52d85SRichard Henderson           .vece = MO_32 },
63509a52d85SRichard Henderson         { .fni8 = gen_ursra64_i64,
63609a52d85SRichard Henderson           .fniv = gen_ursra_vec,
63709a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_d,
63809a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
63909a52d85SRichard Henderson           .opt_opc = vecop_list,
64009a52d85SRichard Henderson           .load_dest = true,
64109a52d85SRichard Henderson           .vece = MO_64 },
64209a52d85SRichard Henderson     };
64309a52d85SRichard Henderson 
64409a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
64509a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
64609a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
64709a52d85SRichard Henderson 
64809a52d85SRichard Henderson     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
64909a52d85SRichard Henderson }
65009a52d85SRichard Henderson 
gen_shr8_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)65109a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
65209a52d85SRichard Henderson {
65309a52d85SRichard Henderson     uint64_t mask = dup_const(MO_8, 0xff >> shift);
65409a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
65509a52d85SRichard Henderson 
65609a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, shift);
65709a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
65809a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
65909a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
66009a52d85SRichard Henderson }
66109a52d85SRichard Henderson 
gen_shr16_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)66209a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
66309a52d85SRichard Henderson {
66409a52d85SRichard Henderson     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
66509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
66609a52d85SRichard Henderson 
66709a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, shift);
66809a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
66909a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
67009a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
67109a52d85SRichard Henderson }
67209a52d85SRichard Henderson 
gen_shr32_ins_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)67309a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
67409a52d85SRichard Henderson {
67509a52d85SRichard Henderson     tcg_gen_shri_i32(a, a, shift);
67609a52d85SRichard Henderson     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
67709a52d85SRichard Henderson }
67809a52d85SRichard Henderson 
gen_shr64_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)67909a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
68009a52d85SRichard Henderson {
68109a52d85SRichard Henderson     tcg_gen_shri_i64(a, a, shift);
68209a52d85SRichard Henderson     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
68309a52d85SRichard Henderson }
68409a52d85SRichard Henderson 
gen_shr_ins_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)68509a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
68609a52d85SRichard Henderson {
68709a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
68809a52d85SRichard Henderson     TCGv_vec m = tcg_temp_new_vec_matching(d);
68909a52d85SRichard Henderson 
69009a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
69109a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, sh);
69209a52d85SRichard Henderson     tcg_gen_and_vec(vece, d, d, m);
69309a52d85SRichard Henderson     tcg_gen_or_vec(vece, d, d, t);
69409a52d85SRichard Henderson }
69509a52d85SRichard Henderson 
gen_gvec_sri(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)69609a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
69709a52d85SRichard Henderson                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
69809a52d85SRichard Henderson {
69909a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
70009a52d85SRichard Henderson     const GVecGen2i ops[4] = {
70109a52d85SRichard Henderson         { .fni8 = gen_shr8_ins_i64,
70209a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
70309a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_b,
70409a52d85SRichard Henderson           .load_dest = true,
70509a52d85SRichard Henderson           .opt_opc = vecop_list,
70609a52d85SRichard Henderson           .vece = MO_8 },
70709a52d85SRichard Henderson         { .fni8 = gen_shr16_ins_i64,
70809a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
70909a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_h,
71009a52d85SRichard Henderson           .load_dest = true,
71109a52d85SRichard Henderson           .opt_opc = vecop_list,
71209a52d85SRichard Henderson           .vece = MO_16 },
71309a52d85SRichard Henderson         { .fni4 = gen_shr32_ins_i32,
71409a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
71509a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_s,
71609a52d85SRichard Henderson           .load_dest = true,
71709a52d85SRichard Henderson           .opt_opc = vecop_list,
71809a52d85SRichard Henderson           .vece = MO_32 },
71909a52d85SRichard Henderson         { .fni8 = gen_shr64_ins_i64,
72009a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
72109a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_d,
72209a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
72309a52d85SRichard Henderson           .load_dest = true,
72409a52d85SRichard Henderson           .opt_opc = vecop_list,
72509a52d85SRichard Henderson           .vece = MO_64 },
72609a52d85SRichard Henderson     };
72709a52d85SRichard Henderson 
72809a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
72909a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
73009a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
73109a52d85SRichard Henderson 
73209a52d85SRichard Henderson     /* Shift of esize leaves destination unchanged. */
73309a52d85SRichard Henderson     if (shift < (8 << vece)) {
73409a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
73509a52d85SRichard Henderson     } else {
73609a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
73709a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
73809a52d85SRichard Henderson     }
73909a52d85SRichard Henderson }
74009a52d85SRichard Henderson 
gen_shl8_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)74109a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
74209a52d85SRichard Henderson {
74309a52d85SRichard Henderson     uint64_t mask = dup_const(MO_8, 0xff << shift);
74409a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
74509a52d85SRichard Henderson 
74609a52d85SRichard Henderson     tcg_gen_shli_i64(t, a, shift);
74709a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
74809a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
74909a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
75009a52d85SRichard Henderson }
75109a52d85SRichard Henderson 
gen_shl16_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)75209a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
75309a52d85SRichard Henderson {
75409a52d85SRichard Henderson     uint64_t mask = dup_const(MO_16, 0xffff << shift);
75509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
75609a52d85SRichard Henderson 
75709a52d85SRichard Henderson     tcg_gen_shli_i64(t, a, shift);
75809a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
75909a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
76009a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
76109a52d85SRichard Henderson }
76209a52d85SRichard Henderson 
gen_shl32_ins_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)76309a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
76409a52d85SRichard Henderson {
76509a52d85SRichard Henderson     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
76609a52d85SRichard Henderson }
76709a52d85SRichard Henderson 
gen_shl64_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)76809a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
76909a52d85SRichard Henderson {
77009a52d85SRichard Henderson     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
77109a52d85SRichard Henderson }
77209a52d85SRichard Henderson 
gen_shl_ins_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)77309a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
77409a52d85SRichard Henderson {
77509a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
77609a52d85SRichard Henderson     TCGv_vec m = tcg_temp_new_vec_matching(d);
77709a52d85SRichard Henderson 
77809a52d85SRichard Henderson     tcg_gen_shli_vec(vece, t, a, sh);
77909a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
78009a52d85SRichard Henderson     tcg_gen_and_vec(vece, d, d, m);
78109a52d85SRichard Henderson     tcg_gen_or_vec(vece, d, d, t);
78209a52d85SRichard Henderson }
78309a52d85SRichard Henderson 
gen_gvec_sli(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)78409a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
78509a52d85SRichard Henderson                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
78609a52d85SRichard Henderson {
78709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
78809a52d85SRichard Henderson     const GVecGen2i ops[4] = {
78909a52d85SRichard Henderson         { .fni8 = gen_shl8_ins_i64,
79009a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
79109a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_b,
79209a52d85SRichard Henderson           .load_dest = true,
79309a52d85SRichard Henderson           .opt_opc = vecop_list,
79409a52d85SRichard Henderson           .vece = MO_8 },
79509a52d85SRichard Henderson         { .fni8 = gen_shl16_ins_i64,
79609a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
79709a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_h,
79809a52d85SRichard Henderson           .load_dest = true,
79909a52d85SRichard Henderson           .opt_opc = vecop_list,
80009a52d85SRichard Henderson           .vece = MO_16 },
80109a52d85SRichard Henderson         { .fni4 = gen_shl32_ins_i32,
80209a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
80309a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_s,
80409a52d85SRichard Henderson           .load_dest = true,
80509a52d85SRichard Henderson           .opt_opc = vecop_list,
80609a52d85SRichard Henderson           .vece = MO_32 },
80709a52d85SRichard Henderson         { .fni8 = gen_shl64_ins_i64,
80809a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
80909a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_d,
81009a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
81109a52d85SRichard Henderson           .load_dest = true,
81209a52d85SRichard Henderson           .opt_opc = vecop_list,
81309a52d85SRichard Henderson           .vece = MO_64 },
81409a52d85SRichard Henderson     };
81509a52d85SRichard Henderson 
81609a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [0..esize-1]. */
81709a52d85SRichard Henderson     tcg_debug_assert(shift >= 0);
81809a52d85SRichard Henderson     tcg_debug_assert(shift < (8 << vece));
81909a52d85SRichard Henderson 
82009a52d85SRichard Henderson     if (shift == 0) {
82109a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
82209a52d85SRichard Henderson     } else {
82309a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
82409a52d85SRichard Henderson     }
82509a52d85SRichard Henderson }
82609a52d85SRichard Henderson 
gen_mla8_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)82709a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
82809a52d85SRichard Henderson {
82909a52d85SRichard Henderson     gen_helper_neon_mul_u8(a, a, b);
83009a52d85SRichard Henderson     gen_helper_neon_add_u8(d, d, a);
83109a52d85SRichard Henderson }
83209a52d85SRichard Henderson 
gen_mls8_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)83309a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
83409a52d85SRichard Henderson {
83509a52d85SRichard Henderson     gen_helper_neon_mul_u8(a, a, b);
83609a52d85SRichard Henderson     gen_helper_neon_sub_u8(d, d, a);
83709a52d85SRichard Henderson }
83809a52d85SRichard Henderson 
gen_mla16_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)83909a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
84009a52d85SRichard Henderson {
84109a52d85SRichard Henderson     gen_helper_neon_mul_u16(a, a, b);
84209a52d85SRichard Henderson     gen_helper_neon_add_u16(d, d, a);
84309a52d85SRichard Henderson }
84409a52d85SRichard Henderson 
gen_mls16_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)84509a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
84609a52d85SRichard Henderson {
84709a52d85SRichard Henderson     gen_helper_neon_mul_u16(a, a, b);
84809a52d85SRichard Henderson     gen_helper_neon_sub_u16(d, d, a);
84909a52d85SRichard Henderson }
85009a52d85SRichard Henderson 
gen_mla32_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)85109a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
85209a52d85SRichard Henderson {
85309a52d85SRichard Henderson     tcg_gen_mul_i32(a, a, b);
85409a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
85509a52d85SRichard Henderson }
85609a52d85SRichard Henderson 
gen_mls32_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)85709a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
85809a52d85SRichard Henderson {
85909a52d85SRichard Henderson     tcg_gen_mul_i32(a, a, b);
86009a52d85SRichard Henderson     tcg_gen_sub_i32(d, d, a);
86109a52d85SRichard Henderson }
86209a52d85SRichard Henderson 
gen_mla64_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)86309a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
86409a52d85SRichard Henderson {
86509a52d85SRichard Henderson     tcg_gen_mul_i64(a, a, b);
86609a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
86709a52d85SRichard Henderson }
86809a52d85SRichard Henderson 
gen_mls64_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)86909a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
87009a52d85SRichard Henderson {
87109a52d85SRichard Henderson     tcg_gen_mul_i64(a, a, b);
87209a52d85SRichard Henderson     tcg_gen_sub_i64(d, d, a);
87309a52d85SRichard Henderson }
87409a52d85SRichard Henderson 
gen_mla_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)87509a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
87609a52d85SRichard Henderson {
87709a52d85SRichard Henderson     tcg_gen_mul_vec(vece, a, a, b);
87809a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
87909a52d85SRichard Henderson }
88009a52d85SRichard Henderson 
gen_mls_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)88109a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
88209a52d85SRichard Henderson {
88309a52d85SRichard Henderson     tcg_gen_mul_vec(vece, a, a, b);
88409a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, a);
88509a52d85SRichard Henderson }
88609a52d85SRichard Henderson 
88709a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
88809a52d85SRichard Henderson  * these tables are shared with AArch64 which does support them.
88909a52d85SRichard Henderson  */
gen_gvec_mla(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)89009a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
89109a52d85SRichard Henderson                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
89209a52d85SRichard Henderson {
89309a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
89409a52d85SRichard Henderson         INDEX_op_mul_vec, INDEX_op_add_vec, 0
89509a52d85SRichard Henderson     };
89609a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
89709a52d85SRichard Henderson         { .fni4 = gen_mla8_i32,
89809a52d85SRichard Henderson           .fniv = gen_mla_vec,
89909a52d85SRichard Henderson           .load_dest = true,
90009a52d85SRichard Henderson           .opt_opc = vecop_list,
90109a52d85SRichard Henderson           .vece = MO_8 },
90209a52d85SRichard Henderson         { .fni4 = gen_mla16_i32,
90309a52d85SRichard Henderson           .fniv = gen_mla_vec,
90409a52d85SRichard Henderson           .load_dest = true,
90509a52d85SRichard Henderson           .opt_opc = vecop_list,
90609a52d85SRichard Henderson           .vece = MO_16 },
90709a52d85SRichard Henderson         { .fni4 = gen_mla32_i32,
90809a52d85SRichard Henderson           .fniv = gen_mla_vec,
90909a52d85SRichard Henderson           .load_dest = true,
91009a52d85SRichard Henderson           .opt_opc = vecop_list,
91109a52d85SRichard Henderson           .vece = MO_32 },
91209a52d85SRichard Henderson         { .fni8 = gen_mla64_i64,
91309a52d85SRichard Henderson           .fniv = gen_mla_vec,
91409a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
91509a52d85SRichard Henderson           .load_dest = true,
91609a52d85SRichard Henderson           .opt_opc = vecop_list,
91709a52d85SRichard Henderson           .vece = MO_64 },
91809a52d85SRichard Henderson     };
91909a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
92009a52d85SRichard Henderson }
92109a52d85SRichard Henderson 
gen_gvec_mls(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)92209a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
92309a52d85SRichard Henderson                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
92409a52d85SRichard Henderson {
92509a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
92609a52d85SRichard Henderson         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
92709a52d85SRichard Henderson     };
92809a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
92909a52d85SRichard Henderson         { .fni4 = gen_mls8_i32,
93009a52d85SRichard Henderson           .fniv = gen_mls_vec,
93109a52d85SRichard Henderson           .load_dest = true,
93209a52d85SRichard Henderson           .opt_opc = vecop_list,
93309a52d85SRichard Henderson           .vece = MO_8 },
93409a52d85SRichard Henderson         { .fni4 = gen_mls16_i32,
93509a52d85SRichard Henderson           .fniv = gen_mls_vec,
93609a52d85SRichard Henderson           .load_dest = true,
93709a52d85SRichard Henderson           .opt_opc = vecop_list,
93809a52d85SRichard Henderson           .vece = MO_16 },
93909a52d85SRichard Henderson         { .fni4 = gen_mls32_i32,
94009a52d85SRichard Henderson           .fniv = gen_mls_vec,
94109a52d85SRichard Henderson           .load_dest = true,
94209a52d85SRichard Henderson           .opt_opc = vecop_list,
94309a52d85SRichard Henderson           .vece = MO_32 },
94409a52d85SRichard Henderson         { .fni8 = gen_mls64_i64,
94509a52d85SRichard Henderson           .fniv = gen_mls_vec,
94609a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
94709a52d85SRichard Henderson           .load_dest = true,
94809a52d85SRichard Henderson           .opt_opc = vecop_list,
94909a52d85SRichard Henderson           .vece = MO_64 },
95009a52d85SRichard Henderson     };
95109a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
95209a52d85SRichard Henderson }
95309a52d85SRichard Henderson 
95409a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */
gen_cmtst_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)95509a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
95609a52d85SRichard Henderson {
957013506e0SRichard Henderson     tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b);
95809a52d85SRichard Henderson }
95909a52d85SRichard Henderson 
gen_cmtst_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)96009a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
96109a52d85SRichard Henderson {
962013506e0SRichard Henderson     tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b);
96309a52d85SRichard Henderson }
96409a52d85SRichard Henderson 
gen_cmtst_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)96509a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
96609a52d85SRichard Henderson {
9672310eb0aSRichard Henderson     tcg_gen_cmp_vec(TCG_COND_TSTNE, vece, d, a, b);
96809a52d85SRichard Henderson }
96909a52d85SRichard Henderson 
gen_gvec_cmtst(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)97009a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
97109a52d85SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
97209a52d85SRichard Henderson {
97309a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
97409a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
97509a52d85SRichard Henderson         { .fni4 = gen_helper_neon_tst_u8,
97609a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
97709a52d85SRichard Henderson           .opt_opc = vecop_list,
97809a52d85SRichard Henderson           .vece = MO_8 },
97909a52d85SRichard Henderson         { .fni4 = gen_helper_neon_tst_u16,
98009a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
98109a52d85SRichard Henderson           .opt_opc = vecop_list,
98209a52d85SRichard Henderson           .vece = MO_16 },
98309a52d85SRichard Henderson         { .fni4 = gen_cmtst_i32,
98409a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
98509a52d85SRichard Henderson           .opt_opc = vecop_list,
98609a52d85SRichard Henderson           .vece = MO_32 },
98709a52d85SRichard Henderson         { .fni8 = gen_cmtst_i64,
98809a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
98909a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
99009a52d85SRichard Henderson           .opt_opc = vecop_list,
99109a52d85SRichard Henderson           .vece = MO_64 },
99209a52d85SRichard Henderson     };
99309a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
99409a52d85SRichard Henderson }
99509a52d85SRichard Henderson 
gen_ushl_i32(TCGv_i32 dst,TCGv_i32 src,TCGv_i32 shift)99609a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
99709a52d85SRichard Henderson {
99809a52d85SRichard Henderson     TCGv_i32 lval = tcg_temp_new_i32();
99909a52d85SRichard Henderson     TCGv_i32 rval = tcg_temp_new_i32();
100009a52d85SRichard Henderson     TCGv_i32 lsh = tcg_temp_new_i32();
100109a52d85SRichard Henderson     TCGv_i32 rsh = tcg_temp_new_i32();
100209a52d85SRichard Henderson     TCGv_i32 zero = tcg_constant_i32(0);
100309a52d85SRichard Henderson     TCGv_i32 max = tcg_constant_i32(32);
100409a52d85SRichard Henderson 
100509a52d85SRichard Henderson     /*
100609a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
100709a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
100809a52d85SRichard Henderson      * Discard out-of-range results after the fact.
100909a52d85SRichard Henderson      */
101009a52d85SRichard Henderson     tcg_gen_ext8s_i32(lsh, shift);
101109a52d85SRichard Henderson     tcg_gen_neg_i32(rsh, lsh);
101209a52d85SRichard Henderson     tcg_gen_shl_i32(lval, src, lsh);
101309a52d85SRichard Henderson     tcg_gen_shr_i32(rval, src, rsh);
101409a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
101509a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
101609a52d85SRichard Henderson }
101709a52d85SRichard Henderson 
gen_ushl_i64(TCGv_i64 dst,TCGv_i64 src,TCGv_i64 shift)101809a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
101909a52d85SRichard Henderson {
102009a52d85SRichard Henderson     TCGv_i64 lval = tcg_temp_new_i64();
102109a52d85SRichard Henderson     TCGv_i64 rval = tcg_temp_new_i64();
102209a52d85SRichard Henderson     TCGv_i64 lsh = tcg_temp_new_i64();
102309a52d85SRichard Henderson     TCGv_i64 rsh = tcg_temp_new_i64();
102409a52d85SRichard Henderson     TCGv_i64 zero = tcg_constant_i64(0);
102509a52d85SRichard Henderson     TCGv_i64 max = tcg_constant_i64(64);
102609a52d85SRichard Henderson 
102709a52d85SRichard Henderson     /*
102809a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
102909a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
103009a52d85SRichard Henderson      * Discard out-of-range results after the fact.
103109a52d85SRichard Henderson      */
103209a52d85SRichard Henderson     tcg_gen_ext8s_i64(lsh, shift);
103309a52d85SRichard Henderson     tcg_gen_neg_i64(rsh, lsh);
103409a52d85SRichard Henderson     tcg_gen_shl_i64(lval, src, lsh);
103509a52d85SRichard Henderson     tcg_gen_shr_i64(rval, src, rsh);
103609a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
103709a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
103809a52d85SRichard Henderson }
103909a52d85SRichard Henderson 
gen_ushl_vec(unsigned vece,TCGv_vec dst,TCGv_vec src,TCGv_vec shift)104009a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
104109a52d85SRichard Henderson                          TCGv_vec src, TCGv_vec shift)
104209a52d85SRichard Henderson {
104309a52d85SRichard Henderson     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
104409a52d85SRichard Henderson     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
104509a52d85SRichard Henderson     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
104609a52d85SRichard Henderson     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
104709a52d85SRichard Henderson     TCGv_vec msk, max;
104809a52d85SRichard Henderson 
104909a52d85SRichard Henderson     tcg_gen_neg_vec(vece, rsh, shift);
105009a52d85SRichard Henderson     if (vece == MO_8) {
105109a52d85SRichard Henderson         tcg_gen_mov_vec(lsh, shift);
105209a52d85SRichard Henderson     } else {
105309a52d85SRichard Henderson         msk = tcg_temp_new_vec_matching(dst);
105409a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, msk, 0xff);
105509a52d85SRichard Henderson         tcg_gen_and_vec(vece, lsh, shift, msk);
105609a52d85SRichard Henderson         tcg_gen_and_vec(vece, rsh, rsh, msk);
105709a52d85SRichard Henderson     }
105809a52d85SRichard Henderson 
105909a52d85SRichard Henderson     /*
106009a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
106109a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
106209a52d85SRichard Henderson      * Discard out-of-range results after the fact.
106309a52d85SRichard Henderson      */
106409a52d85SRichard Henderson     tcg_gen_shlv_vec(vece, lval, src, lsh);
106509a52d85SRichard Henderson     tcg_gen_shrv_vec(vece, rval, src, rsh);
106609a52d85SRichard Henderson 
106709a52d85SRichard Henderson     max = tcg_temp_new_vec_matching(dst);
106809a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, max, 8 << vece);
106909a52d85SRichard Henderson 
107009a52d85SRichard Henderson     /*
107109a52d85SRichard Henderson      * The choice of LT (signed) and GEU (unsigned) are biased toward
107209a52d85SRichard Henderson      * the instructions of the x86_64 host.  For MO_8, the whole byte
107309a52d85SRichard Henderson      * is significant so we must use an unsigned compare; otherwise we
107409a52d85SRichard Henderson      * have already masked to a byte and so a signed compare works.
107509a52d85SRichard Henderson      * Other tcg hosts have a full set of comparisons and do not care.
107609a52d85SRichard Henderson      */
107709a52d85SRichard Henderson     if (vece == MO_8) {
107809a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
107909a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
108009a52d85SRichard Henderson         tcg_gen_andc_vec(vece, lval, lval, lsh);
108109a52d85SRichard Henderson         tcg_gen_andc_vec(vece, rval, rval, rsh);
108209a52d85SRichard Henderson     } else {
108309a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
108409a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
108509a52d85SRichard Henderson         tcg_gen_and_vec(vece, lval, lval, lsh);
108609a52d85SRichard Henderson         tcg_gen_and_vec(vece, rval, rval, rsh);
108709a52d85SRichard Henderson     }
108809a52d85SRichard Henderson     tcg_gen_or_vec(vece, dst, lval, rval);
108909a52d85SRichard Henderson }
109009a52d85SRichard Henderson 
gen_gvec_ushl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)109109a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
109209a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
109309a52d85SRichard Henderson {
109409a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
109509a52d85SRichard Henderson         INDEX_op_neg_vec, INDEX_op_shlv_vec,
109609a52d85SRichard Henderson         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
109709a52d85SRichard Henderson     };
109809a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
109909a52d85SRichard Henderson         { .fniv = gen_ushl_vec,
110009a52d85SRichard Henderson           .fno = gen_helper_gvec_ushl_b,
110109a52d85SRichard Henderson           .opt_opc = vecop_list,
110209a52d85SRichard Henderson           .vece = MO_8 },
110309a52d85SRichard Henderson         { .fniv = gen_ushl_vec,
110409a52d85SRichard Henderson           .fno = gen_helper_gvec_ushl_h,
110509a52d85SRichard Henderson           .opt_opc = vecop_list,
110609a52d85SRichard Henderson           .vece = MO_16 },
110709a52d85SRichard Henderson         { .fni4 = gen_ushl_i32,
110809a52d85SRichard Henderson           .fniv = gen_ushl_vec,
110909a52d85SRichard Henderson           .opt_opc = vecop_list,
111009a52d85SRichard Henderson           .vece = MO_32 },
111109a52d85SRichard Henderson         { .fni8 = gen_ushl_i64,
111209a52d85SRichard Henderson           .fniv = gen_ushl_vec,
111309a52d85SRichard Henderson           .opt_opc = vecop_list,
111409a52d85SRichard Henderson           .vece = MO_64 },
111509a52d85SRichard Henderson     };
111609a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
111709a52d85SRichard Henderson }
111809a52d85SRichard Henderson 
gen_sshl_i32(TCGv_i32 dst,TCGv_i32 src,TCGv_i32 shift)111909a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
112009a52d85SRichard Henderson {
112109a52d85SRichard Henderson     TCGv_i32 lval = tcg_temp_new_i32();
112209a52d85SRichard Henderson     TCGv_i32 rval = tcg_temp_new_i32();
112309a52d85SRichard Henderson     TCGv_i32 lsh = tcg_temp_new_i32();
112409a52d85SRichard Henderson     TCGv_i32 rsh = tcg_temp_new_i32();
112509a52d85SRichard Henderson     TCGv_i32 zero = tcg_constant_i32(0);
112609a52d85SRichard Henderson     TCGv_i32 max = tcg_constant_i32(31);
112709a52d85SRichard Henderson 
112809a52d85SRichard Henderson     /*
112909a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
113009a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
113109a52d85SRichard Henderson      * Discard out-of-range results after the fact.
113209a52d85SRichard Henderson      */
113309a52d85SRichard Henderson     tcg_gen_ext8s_i32(lsh, shift);
113409a52d85SRichard Henderson     tcg_gen_neg_i32(rsh, lsh);
113509a52d85SRichard Henderson     tcg_gen_shl_i32(lval, src, lsh);
113609a52d85SRichard Henderson     tcg_gen_umin_i32(rsh, rsh, max);
113709a52d85SRichard Henderson     tcg_gen_sar_i32(rval, src, rsh);
113809a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
113909a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
114009a52d85SRichard Henderson }
114109a52d85SRichard Henderson 
gen_sshl_i64(TCGv_i64 dst,TCGv_i64 src,TCGv_i64 shift)114209a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
114309a52d85SRichard Henderson {
114409a52d85SRichard Henderson     TCGv_i64 lval = tcg_temp_new_i64();
114509a52d85SRichard Henderson     TCGv_i64 rval = tcg_temp_new_i64();
114609a52d85SRichard Henderson     TCGv_i64 lsh = tcg_temp_new_i64();
114709a52d85SRichard Henderson     TCGv_i64 rsh = tcg_temp_new_i64();
114809a52d85SRichard Henderson     TCGv_i64 zero = tcg_constant_i64(0);
114909a52d85SRichard Henderson     TCGv_i64 max = tcg_constant_i64(63);
115009a52d85SRichard Henderson 
115109a52d85SRichard Henderson     /*
115209a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
115309a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
115409a52d85SRichard Henderson      * Discard out-of-range results after the fact.
115509a52d85SRichard Henderson      */
115609a52d85SRichard Henderson     tcg_gen_ext8s_i64(lsh, shift);
115709a52d85SRichard Henderson     tcg_gen_neg_i64(rsh, lsh);
115809a52d85SRichard Henderson     tcg_gen_shl_i64(lval, src, lsh);
115909a52d85SRichard Henderson     tcg_gen_umin_i64(rsh, rsh, max);
116009a52d85SRichard Henderson     tcg_gen_sar_i64(rval, src, rsh);
116109a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
116209a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
116309a52d85SRichard Henderson }
116409a52d85SRichard Henderson 
gen_sshl_vec(unsigned vece,TCGv_vec dst,TCGv_vec src,TCGv_vec shift)116509a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
116609a52d85SRichard Henderson                          TCGv_vec src, TCGv_vec shift)
116709a52d85SRichard Henderson {
116809a52d85SRichard Henderson     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
116909a52d85SRichard Henderson     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
117009a52d85SRichard Henderson     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
117109a52d85SRichard Henderson     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
117209a52d85SRichard Henderson     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
117309a52d85SRichard Henderson 
117409a52d85SRichard Henderson     /*
117509a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
117609a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
117709a52d85SRichard Henderson      * Discard out-of-range results after the fact.
117809a52d85SRichard Henderson      */
117909a52d85SRichard Henderson     tcg_gen_neg_vec(vece, rsh, shift);
118009a52d85SRichard Henderson     if (vece == MO_8) {
118109a52d85SRichard Henderson         tcg_gen_mov_vec(lsh, shift);
118209a52d85SRichard Henderson     } else {
118309a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, tmp, 0xff);
118409a52d85SRichard Henderson         tcg_gen_and_vec(vece, lsh, shift, tmp);
118509a52d85SRichard Henderson         tcg_gen_and_vec(vece, rsh, rsh, tmp);
118609a52d85SRichard Henderson     }
118709a52d85SRichard Henderson 
118809a52d85SRichard Henderson     /* Bound rsh so out of bound right shift gets -1.  */
118909a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
119009a52d85SRichard Henderson     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
119109a52d85SRichard Henderson     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
119209a52d85SRichard Henderson 
119309a52d85SRichard Henderson     tcg_gen_shlv_vec(vece, lval, src, lsh);
119409a52d85SRichard Henderson     tcg_gen_sarv_vec(vece, rval, src, rsh);
119509a52d85SRichard Henderson 
119609a52d85SRichard Henderson     /* Select in-bound left shift.  */
119709a52d85SRichard Henderson     tcg_gen_andc_vec(vece, lval, lval, tmp);
119809a52d85SRichard Henderson 
119909a52d85SRichard Henderson     /* Select between left and right shift.  */
120009a52d85SRichard Henderson     if (vece == MO_8) {
120109a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, tmp, 0);
120209a52d85SRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
120309a52d85SRichard Henderson     } else {
120409a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, tmp, 0x80);
120509a52d85SRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
120609a52d85SRichard Henderson     }
120709a52d85SRichard Henderson }
120809a52d85SRichard Henderson 
gen_gvec_sshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)120909a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
121009a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
121109a52d85SRichard Henderson {
121209a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
121309a52d85SRichard Henderson         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
121409a52d85SRichard Henderson         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
121509a52d85SRichard Henderson     };
121609a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
121709a52d85SRichard Henderson         { .fniv = gen_sshl_vec,
121809a52d85SRichard Henderson           .fno = gen_helper_gvec_sshl_b,
121909a52d85SRichard Henderson           .opt_opc = vecop_list,
122009a52d85SRichard Henderson           .vece = MO_8 },
122109a52d85SRichard Henderson         { .fniv = gen_sshl_vec,
122209a52d85SRichard Henderson           .fno = gen_helper_gvec_sshl_h,
122309a52d85SRichard Henderson           .opt_opc = vecop_list,
122409a52d85SRichard Henderson           .vece = MO_16 },
122509a52d85SRichard Henderson         { .fni4 = gen_sshl_i32,
122609a52d85SRichard Henderson           .fniv = gen_sshl_vec,
122709a52d85SRichard Henderson           .opt_opc = vecop_list,
122809a52d85SRichard Henderson           .vece = MO_32 },
122909a52d85SRichard Henderson         { .fni8 = gen_sshl_i64,
123009a52d85SRichard Henderson           .fniv = gen_sshl_vec,
123109a52d85SRichard Henderson           .opt_opc = vecop_list,
123209a52d85SRichard Henderson           .vece = MO_64 },
123309a52d85SRichard Henderson     };
123409a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
123509a52d85SRichard Henderson }
123609a52d85SRichard Henderson 
gen_gvec_srshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1237940392c8SRichard Henderson void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1238940392c8SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1239940392c8SRichard Henderson {
1240940392c8SRichard Henderson     static gen_helper_gvec_3 * const fns[] = {
1241940392c8SRichard Henderson         gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h,
1242940392c8SRichard Henderson         gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d,
1243940392c8SRichard Henderson     };
1244940392c8SRichard Henderson     tcg_debug_assert(vece <= MO_64);
1245940392c8SRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1246940392c8SRichard Henderson }
1247940392c8SRichard Henderson 
gen_gvec_urshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1248940392c8SRichard Henderson void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1249940392c8SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1250940392c8SRichard Henderson {
1251940392c8SRichard Henderson     static gen_helper_gvec_3 * const fns[] = {
1252940392c8SRichard Henderson         gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h,
1253940392c8SRichard Henderson         gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d,
1254940392c8SRichard Henderson     };
1255940392c8SRichard Henderson     tcg_debug_assert(vece <= MO_64);
1256940392c8SRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1257940392c8SRichard Henderson }
1258940392c8SRichard Henderson 
gen_neon_sqshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1259e72a6878SRichard Henderson void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1260e72a6878SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1261e72a6878SRichard Henderson {
1262e72a6878SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[] = {
1263e72a6878SRichard Henderson         gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h,
1264e72a6878SRichard Henderson         gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d,
1265e72a6878SRichard Henderson     };
1266e72a6878SRichard Henderson     tcg_debug_assert(vece <= MO_64);
1267e72a6878SRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1268e72a6878SRichard Henderson                        opr_sz, max_sz, 0, fns[vece]);
1269e72a6878SRichard Henderson }
1270e72a6878SRichard Henderson 
gen_neon_uqshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1271e72a6878SRichard Henderson void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1272e72a6878SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1273e72a6878SRichard Henderson {
1274e72a6878SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[] = {
1275e72a6878SRichard Henderson         gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h,
1276e72a6878SRichard Henderson         gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d,
1277e72a6878SRichard Henderson     };
1278e72a6878SRichard Henderson     tcg_debug_assert(vece <= MO_64);
1279e72a6878SRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1280e72a6878SRichard Henderson                        opr_sz, max_sz, 0, fns[vece]);
1281e72a6878SRichard Henderson }
1282e72a6878SRichard Henderson 
gen_neon_sqrshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1283cef9d54fSRichard Henderson void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1284cef9d54fSRichard Henderson                      uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1285cef9d54fSRichard Henderson {
1286cef9d54fSRichard Henderson     static gen_helper_gvec_3_ptr * const fns[] = {
1287cef9d54fSRichard Henderson         gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h,
1288cef9d54fSRichard Henderson         gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d,
1289cef9d54fSRichard Henderson     };
1290cef9d54fSRichard Henderson     tcg_debug_assert(vece <= MO_64);
1291cef9d54fSRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1292cef9d54fSRichard Henderson                        opr_sz, max_sz, 0, fns[vece]);
1293cef9d54fSRichard Henderson }
1294cef9d54fSRichard Henderson 
gen_neon_uqrshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1295cef9d54fSRichard Henderson void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1296cef9d54fSRichard Henderson                      uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1297cef9d54fSRichard Henderson {
1298cef9d54fSRichard Henderson     static gen_helper_gvec_3_ptr * const fns[] = {
1299cef9d54fSRichard Henderson         gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h,
1300cef9d54fSRichard Henderson         gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d,
1301cef9d54fSRichard Henderson     };
1302cef9d54fSRichard Henderson     tcg_debug_assert(vece <= MO_64);
1303cef9d54fSRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1304cef9d54fSRichard Henderson                        opr_sz, max_sz, 0, fns[vece]);
1305cef9d54fSRichard Henderson }
1306cef9d54fSRichard Henderson 
gen_uqadd_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1307f4fa83d6SRichard Henderson void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1308f4fa83d6SRichard Henderson {
1309f4fa83d6SRichard Henderson     uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
1310f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1311f4fa83d6SRichard Henderson 
1312f4fa83d6SRichard Henderson     tcg_gen_add_i64(tmp, a, b);
1313f4fa83d6SRichard Henderson     tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max));
1314f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1315f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1316f4fa83d6SRichard Henderson }
1317f4fa83d6SRichard Henderson 
gen_uqadd_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1318f4fa83d6SRichard Henderson void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1319f4fa83d6SRichard Henderson {
1320f4fa83d6SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1321f4fa83d6SRichard Henderson 
1322f4fa83d6SRichard Henderson     tcg_gen_add_i64(t, a, b);
1323f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a,
1324f4fa83d6SRichard Henderson                         tcg_constant_i64(UINT64_MAX), t);
1325f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t, t, res);
1326f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t);
1327f4fa83d6SRichard Henderson }
1328f4fa83d6SRichard Henderson 
gen_uqadd_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)132976f4a8aeSRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
133009a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
133109a52d85SRichard Henderson {
133209a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
133309a52d85SRichard Henderson     tcg_gen_add_vec(vece, x, a, b);
133409a52d85SRichard Henderson     tcg_gen_usadd_vec(vece, t, a, b);
133576f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
133676f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
133709a52d85SRichard Henderson }
133809a52d85SRichard Henderson 
gen_gvec_uqadd_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)133909a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
134009a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
134109a52d85SRichard Henderson {
134209a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
134376f4a8aeSRichard Henderson         INDEX_op_usadd_vec, INDEX_op_add_vec, 0
134409a52d85SRichard Henderson     };
134509a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
134609a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
134709a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_b,
134809a52d85SRichard Henderson           .write_aofs = true,
134909a52d85SRichard Henderson           .opt_opc = vecop_list,
135009a52d85SRichard Henderson           .vece = MO_8 },
135109a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
135209a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_h,
135309a52d85SRichard Henderson           .write_aofs = true,
135409a52d85SRichard Henderson           .opt_opc = vecop_list,
135509a52d85SRichard Henderson           .vece = MO_16 },
135609a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
135709a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_s,
135809a52d85SRichard Henderson           .write_aofs = true,
135909a52d85SRichard Henderson           .opt_opc = vecop_list,
136009a52d85SRichard Henderson           .vece = MO_32 },
136109a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
1362f4fa83d6SRichard Henderson           .fni8 = gen_uqadd_d,
136309a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_d,
136409a52d85SRichard Henderson           .write_aofs = true,
136509a52d85SRichard Henderson           .opt_opc = vecop_list,
136609a52d85SRichard Henderson           .vece = MO_64 },
136709a52d85SRichard Henderson     };
136801d5665bSRichard Henderson 
136901d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
137009a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
137109a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
137209a52d85SRichard Henderson }
137309a52d85SRichard Henderson 
gen_sqadd_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1374f4fa83d6SRichard Henderson void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1375f4fa83d6SRichard Henderson {
1376f4fa83d6SRichard Henderson     int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
1377f4fa83d6SRichard Henderson     int64_t min = -1ll - max;
1378f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1379f4fa83d6SRichard Henderson 
1380f4fa83d6SRichard Henderson     tcg_gen_add_i64(tmp, a, b);
1381f4fa83d6SRichard Henderson     tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
1382f4fa83d6SRichard Henderson     tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
1383f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1384f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1385f4fa83d6SRichard Henderson }
1386f4fa83d6SRichard Henderson 
gen_sqadd_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1387f4fa83d6SRichard Henderson void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1388f4fa83d6SRichard Henderson {
1389f4fa83d6SRichard Henderson     TCGv_i64 t0 = tcg_temp_new_i64();
1390f4fa83d6SRichard Henderson     TCGv_i64 t1 = tcg_temp_new_i64();
1391f4fa83d6SRichard Henderson     TCGv_i64 t2 = tcg_temp_new_i64();
1392f4fa83d6SRichard Henderson 
1393f4fa83d6SRichard Henderson     tcg_gen_add_i64(t0, a, b);
1394f4fa83d6SRichard Henderson 
1395f4fa83d6SRichard Henderson     /* Compute signed overflow indication into T1 */
1396f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t1, a, b);
1397f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t2, t0, a);
1398f4fa83d6SRichard Henderson     tcg_gen_andc_i64(t1, t2, t1);
1399f4fa83d6SRichard Henderson 
1400f4fa83d6SRichard Henderson     /* Compute saturated value into T2 */
1401f4fa83d6SRichard Henderson     tcg_gen_sari_i64(t2, a, 63);
1402f4fa83d6SRichard Henderson     tcg_gen_xori_i64(t2, t2, INT64_MAX);
1403f4fa83d6SRichard Henderson 
1404f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
1405f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t0, t0, res);
1406f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t0);
1407f4fa83d6SRichard Henderson }
1408f4fa83d6SRichard Henderson 
gen_sqadd_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)140976f4a8aeSRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
141009a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
141109a52d85SRichard Henderson {
141209a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
141309a52d85SRichard Henderson     tcg_gen_add_vec(vece, x, a, b);
141409a52d85SRichard Henderson     tcg_gen_ssadd_vec(vece, t, a, b);
141576f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
141676f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
141709a52d85SRichard Henderson }
141809a52d85SRichard Henderson 
gen_gvec_sqadd_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)141909a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
142009a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
142109a52d85SRichard Henderson {
142209a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
142376f4a8aeSRichard Henderson         INDEX_op_ssadd_vec, INDEX_op_add_vec, 0
142409a52d85SRichard Henderson     };
142509a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
142609a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
142709a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_b,
142809a52d85SRichard Henderson           .opt_opc = vecop_list,
142909a52d85SRichard Henderson           .write_aofs = true,
143009a52d85SRichard Henderson           .vece = MO_8 },
143109a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
143209a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_h,
143309a52d85SRichard Henderson           .opt_opc = vecop_list,
143409a52d85SRichard Henderson           .write_aofs = true,
143509a52d85SRichard Henderson           .vece = MO_16 },
143609a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
143709a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_s,
143809a52d85SRichard Henderson           .opt_opc = vecop_list,
143909a52d85SRichard Henderson           .write_aofs = true,
144009a52d85SRichard Henderson           .vece = MO_32 },
144109a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
1442f4fa83d6SRichard Henderson           .fni8 = gen_sqadd_d,
144309a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_d,
144409a52d85SRichard Henderson           .opt_opc = vecop_list,
144509a52d85SRichard Henderson           .write_aofs = true,
144609a52d85SRichard Henderson           .vece = MO_64 },
144709a52d85SRichard Henderson     };
144801d5665bSRichard Henderson 
144901d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
145009a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
145109a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
145209a52d85SRichard Henderson }
145309a52d85SRichard Henderson 
gen_uqsub_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1454f4fa83d6SRichard Henderson void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1455f4fa83d6SRichard Henderson {
1456f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1457f4fa83d6SRichard Henderson 
1458f4fa83d6SRichard Henderson     tcg_gen_sub_i64(tmp, a, b);
1459f4fa83d6SRichard Henderson     tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0));
1460f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1461f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1462f4fa83d6SRichard Henderson }
1463f4fa83d6SRichard Henderson 
gen_uqsub_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1464f4fa83d6SRichard Henderson void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1465f4fa83d6SRichard Henderson {
1466f4fa83d6SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1467f4fa83d6SRichard Henderson 
1468f4fa83d6SRichard Henderson     tcg_gen_sub_i64(t, a, b);
1469f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t);
1470f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t, t, res);
1471f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t);
1472f4fa83d6SRichard Henderson }
1473f4fa83d6SRichard Henderson 
gen_uqsub_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)147476f4a8aeSRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
147509a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
147609a52d85SRichard Henderson {
147709a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
147809a52d85SRichard Henderson     tcg_gen_sub_vec(vece, x, a, b);
147909a52d85SRichard Henderson     tcg_gen_ussub_vec(vece, t, a, b);
148076f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
148176f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
148209a52d85SRichard Henderson }
148309a52d85SRichard Henderson 
gen_gvec_uqsub_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)148409a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
148509a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
148609a52d85SRichard Henderson {
148709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
148876f4a8aeSRichard Henderson         INDEX_op_ussub_vec, INDEX_op_sub_vec, 0
148909a52d85SRichard Henderson     };
149009a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
149109a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
149209a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_b,
149309a52d85SRichard Henderson           .opt_opc = vecop_list,
149409a52d85SRichard Henderson           .write_aofs = true,
149509a52d85SRichard Henderson           .vece = MO_8 },
149609a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
149709a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_h,
149809a52d85SRichard Henderson           .opt_opc = vecop_list,
149909a52d85SRichard Henderson           .write_aofs = true,
150009a52d85SRichard Henderson           .vece = MO_16 },
150109a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
150209a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_s,
150309a52d85SRichard Henderson           .opt_opc = vecop_list,
150409a52d85SRichard Henderson           .write_aofs = true,
150509a52d85SRichard Henderson           .vece = MO_32 },
150609a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
1507f4fa83d6SRichard Henderson           .fni8 = gen_uqsub_d,
150809a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_d,
150909a52d85SRichard Henderson           .opt_opc = vecop_list,
151009a52d85SRichard Henderson           .write_aofs = true,
151109a52d85SRichard Henderson           .vece = MO_64 },
151209a52d85SRichard Henderson     };
151301d5665bSRichard Henderson 
151401d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
151509a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
151609a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
151709a52d85SRichard Henderson }
151809a52d85SRichard Henderson 
gen_sqsub_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1519f4fa83d6SRichard Henderson void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1520f4fa83d6SRichard Henderson {
1521f4fa83d6SRichard Henderson     int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
1522f4fa83d6SRichard Henderson     int64_t min = -1ll - max;
1523f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1524f4fa83d6SRichard Henderson 
1525f4fa83d6SRichard Henderson     tcg_gen_sub_i64(tmp, a, b);
1526f4fa83d6SRichard Henderson     tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
1527f4fa83d6SRichard Henderson     tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
1528f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1529f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1530f4fa83d6SRichard Henderson }
1531f4fa83d6SRichard Henderson 
gen_sqsub_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1532f4fa83d6SRichard Henderson void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1533f4fa83d6SRichard Henderson {
1534f4fa83d6SRichard Henderson     TCGv_i64 t0 = tcg_temp_new_i64();
1535f4fa83d6SRichard Henderson     TCGv_i64 t1 = tcg_temp_new_i64();
1536f4fa83d6SRichard Henderson     TCGv_i64 t2 = tcg_temp_new_i64();
1537f4fa83d6SRichard Henderson 
1538f4fa83d6SRichard Henderson     tcg_gen_sub_i64(t0, a, b);
1539f4fa83d6SRichard Henderson 
1540f4fa83d6SRichard Henderson     /* Compute signed overflow indication into T1 */
1541f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t1, a, b);
1542f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t2, t0, a);
1543f4fa83d6SRichard Henderson     tcg_gen_and_i64(t1, t1, t2);
1544f4fa83d6SRichard Henderson 
1545f4fa83d6SRichard Henderson     /* Compute saturated value into T2 */
1546f4fa83d6SRichard Henderson     tcg_gen_sari_i64(t2, a, 63);
1547f4fa83d6SRichard Henderson     tcg_gen_xori_i64(t2, t2, INT64_MAX);
1548f4fa83d6SRichard Henderson 
1549f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
1550f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t0, t0, res);
1551f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t0);
1552f4fa83d6SRichard Henderson }
1553f4fa83d6SRichard Henderson 
gen_sqsub_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)155476f4a8aeSRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
155509a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
155609a52d85SRichard Henderson {
155709a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
155809a52d85SRichard Henderson     tcg_gen_sub_vec(vece, x, a, b);
155909a52d85SRichard Henderson     tcg_gen_sssub_vec(vece, t, a, b);
156076f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
156176f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
156209a52d85SRichard Henderson }
156309a52d85SRichard Henderson 
gen_gvec_sqsub_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)156409a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
156509a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
156609a52d85SRichard Henderson {
156709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
156876f4a8aeSRichard Henderson         INDEX_op_sssub_vec, INDEX_op_sub_vec, 0
156909a52d85SRichard Henderson     };
157009a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
157109a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
157209a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_b,
157309a52d85SRichard Henderson           .opt_opc = vecop_list,
157409a52d85SRichard Henderson           .write_aofs = true,
157509a52d85SRichard Henderson           .vece = MO_8 },
157609a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
157709a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_h,
157809a52d85SRichard Henderson           .opt_opc = vecop_list,
157909a52d85SRichard Henderson           .write_aofs = true,
158009a52d85SRichard Henderson           .vece = MO_16 },
158109a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
158209a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_s,
158309a52d85SRichard Henderson           .opt_opc = vecop_list,
158409a52d85SRichard Henderson           .write_aofs = true,
158509a52d85SRichard Henderson           .vece = MO_32 },
158609a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
1587f4fa83d6SRichard Henderson           .fni8 = gen_sqsub_d,
158809a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_d,
158909a52d85SRichard Henderson           .opt_opc = vecop_list,
159009a52d85SRichard Henderson           .write_aofs = true,
159109a52d85SRichard Henderson           .vece = MO_64 },
159209a52d85SRichard Henderson     };
159301d5665bSRichard Henderson 
159401d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
159509a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
159609a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
159709a52d85SRichard Henderson }
159809a52d85SRichard Henderson 
gen_sabd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)159909a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
160009a52d85SRichard Henderson {
160109a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
160209a52d85SRichard Henderson 
160309a52d85SRichard Henderson     tcg_gen_sub_i32(t, a, b);
160409a52d85SRichard Henderson     tcg_gen_sub_i32(d, b, a);
160509a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
160609a52d85SRichard Henderson }
160709a52d85SRichard Henderson 
gen_sabd_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)160809a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
160909a52d85SRichard Henderson {
161009a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
161109a52d85SRichard Henderson 
161209a52d85SRichard Henderson     tcg_gen_sub_i64(t, a, b);
161309a52d85SRichard Henderson     tcg_gen_sub_i64(d, b, a);
161409a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
161509a52d85SRichard Henderson }
161609a52d85SRichard Henderson 
gen_sabd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)161709a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
161809a52d85SRichard Henderson {
161909a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
162009a52d85SRichard Henderson 
162109a52d85SRichard Henderson     tcg_gen_smin_vec(vece, t, a, b);
162209a52d85SRichard Henderson     tcg_gen_smax_vec(vece, d, a, b);
162309a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
162409a52d85SRichard Henderson }
162509a52d85SRichard Henderson 
gen_gvec_sabd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)162609a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
162709a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
162809a52d85SRichard Henderson {
162909a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
163009a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
163109a52d85SRichard Henderson     };
163209a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
163309a52d85SRichard Henderson         { .fniv = gen_sabd_vec,
163409a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_b,
163509a52d85SRichard Henderson           .opt_opc = vecop_list,
163609a52d85SRichard Henderson           .vece = MO_8 },
163709a52d85SRichard Henderson         { .fniv = gen_sabd_vec,
163809a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_h,
163909a52d85SRichard Henderson           .opt_opc = vecop_list,
164009a52d85SRichard Henderson           .vece = MO_16 },
164109a52d85SRichard Henderson         { .fni4 = gen_sabd_i32,
164209a52d85SRichard Henderson           .fniv = gen_sabd_vec,
164309a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_s,
164409a52d85SRichard Henderson           .opt_opc = vecop_list,
164509a52d85SRichard Henderson           .vece = MO_32 },
164609a52d85SRichard Henderson         { .fni8 = gen_sabd_i64,
164709a52d85SRichard Henderson           .fniv = gen_sabd_vec,
164809a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_d,
164909a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
165009a52d85SRichard Henderson           .opt_opc = vecop_list,
165109a52d85SRichard Henderson           .vece = MO_64 },
165209a52d85SRichard Henderson     };
165309a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
165409a52d85SRichard Henderson }
165509a52d85SRichard Henderson 
gen_uabd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)165609a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
165709a52d85SRichard Henderson {
165809a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
165909a52d85SRichard Henderson 
166009a52d85SRichard Henderson     tcg_gen_sub_i32(t, a, b);
166109a52d85SRichard Henderson     tcg_gen_sub_i32(d, b, a);
166209a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
166309a52d85SRichard Henderson }
166409a52d85SRichard Henderson 
gen_uabd_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)166509a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
166609a52d85SRichard Henderson {
166709a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
166809a52d85SRichard Henderson 
166909a52d85SRichard Henderson     tcg_gen_sub_i64(t, a, b);
167009a52d85SRichard Henderson     tcg_gen_sub_i64(d, b, a);
167109a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
167209a52d85SRichard Henderson }
167309a52d85SRichard Henderson 
gen_uabd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)167409a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
167509a52d85SRichard Henderson {
167609a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
167709a52d85SRichard Henderson 
167809a52d85SRichard Henderson     tcg_gen_umin_vec(vece, t, a, b);
167909a52d85SRichard Henderson     tcg_gen_umax_vec(vece, d, a, b);
168009a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
168109a52d85SRichard Henderson }
168209a52d85SRichard Henderson 
gen_gvec_uabd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)168309a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
168409a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
168509a52d85SRichard Henderson {
168609a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
168709a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
168809a52d85SRichard Henderson     };
168909a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
169009a52d85SRichard Henderson         { .fniv = gen_uabd_vec,
169109a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_b,
169209a52d85SRichard Henderson           .opt_opc = vecop_list,
169309a52d85SRichard Henderson           .vece = MO_8 },
169409a52d85SRichard Henderson         { .fniv = gen_uabd_vec,
169509a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_h,
169609a52d85SRichard Henderson           .opt_opc = vecop_list,
169709a52d85SRichard Henderson           .vece = MO_16 },
169809a52d85SRichard Henderson         { .fni4 = gen_uabd_i32,
169909a52d85SRichard Henderson           .fniv = gen_uabd_vec,
170009a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_s,
170109a52d85SRichard Henderson           .opt_opc = vecop_list,
170209a52d85SRichard Henderson           .vece = MO_32 },
170309a52d85SRichard Henderson         { .fni8 = gen_uabd_i64,
170409a52d85SRichard Henderson           .fniv = gen_uabd_vec,
170509a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_d,
170609a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
170709a52d85SRichard Henderson           .opt_opc = vecop_list,
170809a52d85SRichard Henderson           .vece = MO_64 },
170909a52d85SRichard Henderson     };
171009a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
171109a52d85SRichard Henderson }
171209a52d85SRichard Henderson 
gen_saba_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)171309a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
171409a52d85SRichard Henderson {
171509a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
171609a52d85SRichard Henderson     gen_sabd_i32(t, a, b);
171709a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
171809a52d85SRichard Henderson }
171909a52d85SRichard Henderson 
gen_saba_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)172009a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
172109a52d85SRichard Henderson {
172209a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
172309a52d85SRichard Henderson     gen_sabd_i64(t, a, b);
172409a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
172509a52d85SRichard Henderson }
172609a52d85SRichard Henderson 
gen_saba_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)172709a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
172809a52d85SRichard Henderson {
172909a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
173009a52d85SRichard Henderson     gen_sabd_vec(vece, t, a, b);
173109a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
173209a52d85SRichard Henderson }
173309a52d85SRichard Henderson 
gen_gvec_saba(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)173409a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
173509a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
173609a52d85SRichard Henderson {
173709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
173809a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_add_vec,
173909a52d85SRichard Henderson         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
174009a52d85SRichard Henderson     };
174109a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
174209a52d85SRichard Henderson         { .fniv = gen_saba_vec,
174309a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_b,
174409a52d85SRichard Henderson           .opt_opc = vecop_list,
174509a52d85SRichard Henderson           .load_dest = true,
174609a52d85SRichard Henderson           .vece = MO_8 },
174709a52d85SRichard Henderson         { .fniv = gen_saba_vec,
174809a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_h,
174909a52d85SRichard Henderson           .opt_opc = vecop_list,
175009a52d85SRichard Henderson           .load_dest = true,
175109a52d85SRichard Henderson           .vece = MO_16 },
175209a52d85SRichard Henderson         { .fni4 = gen_saba_i32,
175309a52d85SRichard Henderson           .fniv = gen_saba_vec,
175409a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_s,
175509a52d85SRichard Henderson           .opt_opc = vecop_list,
175609a52d85SRichard Henderson           .load_dest = true,
175709a52d85SRichard Henderson           .vece = MO_32 },
175809a52d85SRichard Henderson         { .fni8 = gen_saba_i64,
175909a52d85SRichard Henderson           .fniv = gen_saba_vec,
176009a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_d,
176109a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
176209a52d85SRichard Henderson           .opt_opc = vecop_list,
176309a52d85SRichard Henderson           .load_dest = true,
176409a52d85SRichard Henderson           .vece = MO_64 },
176509a52d85SRichard Henderson     };
176609a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
176709a52d85SRichard Henderson }
176809a52d85SRichard Henderson 
gen_uaba_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)176909a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
177009a52d85SRichard Henderson {
177109a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
177209a52d85SRichard Henderson     gen_uabd_i32(t, a, b);
177309a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
177409a52d85SRichard Henderson }
177509a52d85SRichard Henderson 
gen_uaba_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)177609a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
177709a52d85SRichard Henderson {
177809a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
177909a52d85SRichard Henderson     gen_uabd_i64(t, a, b);
178009a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
178109a52d85SRichard Henderson }
178209a52d85SRichard Henderson 
gen_uaba_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)178309a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
178409a52d85SRichard Henderson {
178509a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
178609a52d85SRichard Henderson     gen_uabd_vec(vece, t, a, b);
178709a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
178809a52d85SRichard Henderson }
178909a52d85SRichard Henderson 
gen_gvec_uaba(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)179009a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
179109a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
179209a52d85SRichard Henderson {
179309a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
179409a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_add_vec,
179509a52d85SRichard Henderson         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
179609a52d85SRichard Henderson     };
179709a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
179809a52d85SRichard Henderson         { .fniv = gen_uaba_vec,
179909a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_b,
180009a52d85SRichard Henderson           .opt_opc = vecop_list,
180109a52d85SRichard Henderson           .load_dest = true,
180209a52d85SRichard Henderson           .vece = MO_8 },
180309a52d85SRichard Henderson         { .fniv = gen_uaba_vec,
180409a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_h,
180509a52d85SRichard Henderson           .opt_opc = vecop_list,
180609a52d85SRichard Henderson           .load_dest = true,
180709a52d85SRichard Henderson           .vece = MO_16 },
180809a52d85SRichard Henderson         { .fni4 = gen_uaba_i32,
180909a52d85SRichard Henderson           .fniv = gen_uaba_vec,
181009a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_s,
181109a52d85SRichard Henderson           .opt_opc = vecop_list,
181209a52d85SRichard Henderson           .load_dest = true,
181309a52d85SRichard Henderson           .vece = MO_32 },
181409a52d85SRichard Henderson         { .fni8 = gen_uaba_i64,
181509a52d85SRichard Henderson           .fniv = gen_uaba_vec,
181609a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_d,
181709a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
181809a52d85SRichard Henderson           .opt_opc = vecop_list,
181909a52d85SRichard Henderson           .load_dest = true,
182009a52d85SRichard Henderson           .vece = MO_64 },
182109a52d85SRichard Henderson     };
182209a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
182309a52d85SRichard Henderson }
1824a7e4eec6SRichard Henderson 
gen_gvec_addp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1825a7e4eec6SRichard Henderson void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1826a7e4eec6SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1827a7e4eec6SRichard Henderson {
1828a7e4eec6SRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
1829a7e4eec6SRichard Henderson         gen_helper_gvec_addp_b,
1830a7e4eec6SRichard Henderson         gen_helper_gvec_addp_h,
1831a7e4eec6SRichard Henderson         gen_helper_gvec_addp_s,
1832a7e4eec6SRichard Henderson         gen_helper_gvec_addp_d,
1833a7e4eec6SRichard Henderson     };
1834a7e4eec6SRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1835a7e4eec6SRichard Henderson }
183628b5451bSRichard Henderson 
gen_gvec_smaxp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)183728b5451bSRichard Henderson void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
183828b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
183928b5451bSRichard Henderson {
184028b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
184128b5451bSRichard Henderson         gen_helper_gvec_smaxp_b,
184228b5451bSRichard Henderson         gen_helper_gvec_smaxp_h,
184328b5451bSRichard Henderson         gen_helper_gvec_smaxp_s,
184428b5451bSRichard Henderson     };
184528b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
184628b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
184728b5451bSRichard Henderson }
184828b5451bSRichard Henderson 
gen_gvec_sminp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)184928b5451bSRichard Henderson void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
185028b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
185128b5451bSRichard Henderson {
185228b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
185328b5451bSRichard Henderson         gen_helper_gvec_sminp_b,
185428b5451bSRichard Henderson         gen_helper_gvec_sminp_h,
185528b5451bSRichard Henderson         gen_helper_gvec_sminp_s,
185628b5451bSRichard Henderson     };
185728b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
185828b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
185928b5451bSRichard Henderson }
186028b5451bSRichard Henderson 
gen_gvec_umaxp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)186128b5451bSRichard Henderson void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
186228b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
186328b5451bSRichard Henderson {
186428b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
186528b5451bSRichard Henderson         gen_helper_gvec_umaxp_b,
186628b5451bSRichard Henderson         gen_helper_gvec_umaxp_h,
186728b5451bSRichard Henderson         gen_helper_gvec_umaxp_s,
186828b5451bSRichard Henderson     };
186928b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
187028b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
187128b5451bSRichard Henderson }
187228b5451bSRichard Henderson 
gen_gvec_uminp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)187328b5451bSRichard Henderson void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
187428b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
187528b5451bSRichard Henderson {
187628b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
187728b5451bSRichard Henderson         gen_helper_gvec_uminp_b,
187828b5451bSRichard Henderson         gen_helper_gvec_uminp_h,
187928b5451bSRichard Henderson         gen_helper_gvec_uminp_s,
188028b5451bSRichard Henderson     };
188128b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
188228b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
188328b5451bSRichard Henderson }
1884203aca91SRichard Henderson 
gen_shadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1885203aca91SRichard Henderson static void gen_shadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1886203aca91SRichard Henderson {
1887203aca91SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1888203aca91SRichard Henderson 
1889203aca91SRichard Henderson     tcg_gen_and_i64(t, a, b);
1890203aca91SRichard Henderson     tcg_gen_vec_sar8i_i64(a, a, 1);
1891203aca91SRichard Henderson     tcg_gen_vec_sar8i_i64(b, b, 1);
1892203aca91SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
1893203aca91SRichard Henderson     tcg_gen_vec_add8_i64(d, a, b);
1894203aca91SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
1895203aca91SRichard Henderson }
1896203aca91SRichard Henderson 
gen_shadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1897203aca91SRichard Henderson static void gen_shadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1898203aca91SRichard Henderson {
1899203aca91SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1900203aca91SRichard Henderson 
1901203aca91SRichard Henderson     tcg_gen_and_i64(t, a, b);
1902203aca91SRichard Henderson     tcg_gen_vec_sar16i_i64(a, a, 1);
1903203aca91SRichard Henderson     tcg_gen_vec_sar16i_i64(b, b, 1);
1904203aca91SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
1905203aca91SRichard Henderson     tcg_gen_vec_add16_i64(d, a, b);
1906203aca91SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
1907203aca91SRichard Henderson }
1908203aca91SRichard Henderson 
gen_shadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)1909203aca91SRichard Henderson static void gen_shadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1910203aca91SRichard Henderson {
1911203aca91SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
1912203aca91SRichard Henderson 
1913203aca91SRichard Henderson     tcg_gen_and_i32(t, a, b);
1914203aca91SRichard Henderson     tcg_gen_sari_i32(a, a, 1);
1915203aca91SRichard Henderson     tcg_gen_sari_i32(b, b, 1);
1916203aca91SRichard Henderson     tcg_gen_andi_i32(t, t, 1);
1917203aca91SRichard Henderson     tcg_gen_add_i32(d, a, b);
1918203aca91SRichard Henderson     tcg_gen_add_i32(d, d, t);
1919203aca91SRichard Henderson }
1920203aca91SRichard Henderson 
gen_shadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)1921203aca91SRichard Henderson static void gen_shadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1922203aca91SRichard Henderson {
1923203aca91SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
1924203aca91SRichard Henderson 
1925203aca91SRichard Henderson     tcg_gen_and_vec(vece, t, a, b);
1926203aca91SRichard Henderson     tcg_gen_sari_vec(vece, a, a, 1);
1927203aca91SRichard Henderson     tcg_gen_sari_vec(vece, b, b, 1);
1928203aca91SRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
1929203aca91SRichard Henderson     tcg_gen_add_vec(vece, d, a, b);
1930203aca91SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
1931203aca91SRichard Henderson }
1932203aca91SRichard Henderson 
gen_gvec_shadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1933203aca91SRichard Henderson void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1934203aca91SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1935203aca91SRichard Henderson {
1936203aca91SRichard Henderson     static const TCGOpcode vecop_list[] = {
1937203aca91SRichard Henderson         INDEX_op_sari_vec, INDEX_op_add_vec, 0
1938203aca91SRichard Henderson     };
1939203aca91SRichard Henderson     static const GVecGen3 g[] = {
1940203aca91SRichard Henderson         { .fni8 = gen_shadd8_i64,
1941203aca91SRichard Henderson           .fniv = gen_shadd_vec,
1942203aca91SRichard Henderson           .opt_opc = vecop_list,
1943203aca91SRichard Henderson           .vece = MO_8 },
1944203aca91SRichard Henderson         { .fni8 = gen_shadd16_i64,
1945203aca91SRichard Henderson           .fniv = gen_shadd_vec,
1946203aca91SRichard Henderson           .opt_opc = vecop_list,
1947203aca91SRichard Henderson           .vece = MO_16 },
1948203aca91SRichard Henderson         { .fni4 = gen_shadd_i32,
1949203aca91SRichard Henderson           .fniv = gen_shadd_vec,
1950203aca91SRichard Henderson           .opt_opc = vecop_list,
1951203aca91SRichard Henderson           .vece = MO_32 },
1952203aca91SRichard Henderson     };
1953203aca91SRichard Henderson     tcg_debug_assert(vece <= MO_32);
1954203aca91SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
1955203aca91SRichard Henderson }
1956203aca91SRichard Henderson 
gen_uhadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1957203aca91SRichard Henderson static void gen_uhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1958203aca91SRichard Henderson {
1959203aca91SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1960203aca91SRichard Henderson 
1961203aca91SRichard Henderson     tcg_gen_and_i64(t, a, b);
1962203aca91SRichard Henderson     tcg_gen_vec_shr8i_i64(a, a, 1);
1963203aca91SRichard Henderson     tcg_gen_vec_shr8i_i64(b, b, 1);
1964203aca91SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
1965203aca91SRichard Henderson     tcg_gen_vec_add8_i64(d, a, b);
1966203aca91SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
1967203aca91SRichard Henderson }
1968203aca91SRichard Henderson 
gen_uhadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1969203aca91SRichard Henderson static void gen_uhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1970203aca91SRichard Henderson {
1971203aca91SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1972203aca91SRichard Henderson 
1973203aca91SRichard Henderson     tcg_gen_and_i64(t, a, b);
1974203aca91SRichard Henderson     tcg_gen_vec_shr16i_i64(a, a, 1);
1975203aca91SRichard Henderson     tcg_gen_vec_shr16i_i64(b, b, 1);
1976203aca91SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
1977203aca91SRichard Henderson     tcg_gen_vec_add16_i64(d, a, b);
1978203aca91SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
1979203aca91SRichard Henderson }
1980203aca91SRichard Henderson 
gen_uhadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)1981203aca91SRichard Henderson static void gen_uhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1982203aca91SRichard Henderson {
1983203aca91SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
1984203aca91SRichard Henderson 
1985203aca91SRichard Henderson     tcg_gen_and_i32(t, a, b);
1986203aca91SRichard Henderson     tcg_gen_shri_i32(a, a, 1);
1987203aca91SRichard Henderson     tcg_gen_shri_i32(b, b, 1);
1988203aca91SRichard Henderson     tcg_gen_andi_i32(t, t, 1);
1989203aca91SRichard Henderson     tcg_gen_add_i32(d, a, b);
1990203aca91SRichard Henderson     tcg_gen_add_i32(d, d, t);
1991203aca91SRichard Henderson }
1992203aca91SRichard Henderson 
gen_uhadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)1993203aca91SRichard Henderson static void gen_uhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1994203aca91SRichard Henderson {
1995203aca91SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
1996203aca91SRichard Henderson 
1997203aca91SRichard Henderson     tcg_gen_and_vec(vece, t, a, b);
1998203aca91SRichard Henderson     tcg_gen_shri_vec(vece, a, a, 1);
1999203aca91SRichard Henderson     tcg_gen_shri_vec(vece, b, b, 1);
2000203aca91SRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
2001203aca91SRichard Henderson     tcg_gen_add_vec(vece, d, a, b);
2002203aca91SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
2003203aca91SRichard Henderson }
2004203aca91SRichard Henderson 
gen_gvec_uhadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)2005203aca91SRichard Henderson void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2006203aca91SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2007203aca91SRichard Henderson {
2008203aca91SRichard Henderson     static const TCGOpcode vecop_list[] = {
2009203aca91SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
2010203aca91SRichard Henderson     };
2011203aca91SRichard Henderson     static const GVecGen3 g[] = {
2012203aca91SRichard Henderson         { .fni8 = gen_uhadd8_i64,
2013203aca91SRichard Henderson           .fniv = gen_uhadd_vec,
2014203aca91SRichard Henderson           .opt_opc = vecop_list,
2015203aca91SRichard Henderson           .vece = MO_8 },
2016203aca91SRichard Henderson         { .fni8 = gen_uhadd16_i64,
2017203aca91SRichard Henderson           .fniv = gen_uhadd_vec,
2018203aca91SRichard Henderson           .opt_opc = vecop_list,
2019203aca91SRichard Henderson           .vece = MO_16 },
2020203aca91SRichard Henderson         { .fni4 = gen_uhadd_i32,
2021203aca91SRichard Henderson           .fniv = gen_uhadd_vec,
2022203aca91SRichard Henderson           .opt_opc = vecop_list,
2023203aca91SRichard Henderson           .vece = MO_32 },
2024203aca91SRichard Henderson     };
2025203aca91SRichard Henderson     tcg_debug_assert(vece <= MO_32);
2026203aca91SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
2027203aca91SRichard Henderson }
202834c0d865SRichard Henderson 
gen_shsub8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)202934c0d865SRichard Henderson static void gen_shsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
203034c0d865SRichard Henderson {
203134c0d865SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
203234c0d865SRichard Henderson 
203334c0d865SRichard Henderson     tcg_gen_andc_i64(t, b, a);
203434c0d865SRichard Henderson     tcg_gen_vec_sar8i_i64(a, a, 1);
203534c0d865SRichard Henderson     tcg_gen_vec_sar8i_i64(b, b, 1);
203634c0d865SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
203734c0d865SRichard Henderson     tcg_gen_vec_sub8_i64(d, a, b);
203834c0d865SRichard Henderson     tcg_gen_vec_sub8_i64(d, d, t);
203934c0d865SRichard Henderson }
204034c0d865SRichard Henderson 
gen_shsub16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)204134c0d865SRichard Henderson static void gen_shsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
204234c0d865SRichard Henderson {
204334c0d865SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
204434c0d865SRichard Henderson 
204534c0d865SRichard Henderson     tcg_gen_andc_i64(t, b, a);
204634c0d865SRichard Henderson     tcg_gen_vec_sar16i_i64(a, a, 1);
204734c0d865SRichard Henderson     tcg_gen_vec_sar16i_i64(b, b, 1);
204834c0d865SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
204934c0d865SRichard Henderson     tcg_gen_vec_sub16_i64(d, a, b);
205034c0d865SRichard Henderson     tcg_gen_vec_sub16_i64(d, d, t);
205134c0d865SRichard Henderson }
205234c0d865SRichard Henderson 
gen_shsub_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)205334c0d865SRichard Henderson static void gen_shsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
205434c0d865SRichard Henderson {
205534c0d865SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
205634c0d865SRichard Henderson 
205734c0d865SRichard Henderson     tcg_gen_andc_i32(t, b, a);
205834c0d865SRichard Henderson     tcg_gen_sari_i32(a, a, 1);
205934c0d865SRichard Henderson     tcg_gen_sari_i32(b, b, 1);
206034c0d865SRichard Henderson     tcg_gen_andi_i32(t, t, 1);
206134c0d865SRichard Henderson     tcg_gen_sub_i32(d, a, b);
206234c0d865SRichard Henderson     tcg_gen_sub_i32(d, d, t);
206334c0d865SRichard Henderson }
206434c0d865SRichard Henderson 
gen_shsub_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)206534c0d865SRichard Henderson static void gen_shsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
206634c0d865SRichard Henderson {
206734c0d865SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
206834c0d865SRichard Henderson 
206934c0d865SRichard Henderson     tcg_gen_andc_vec(vece, t, b, a);
207034c0d865SRichard Henderson     tcg_gen_sari_vec(vece, a, a, 1);
207134c0d865SRichard Henderson     tcg_gen_sari_vec(vece, b, b, 1);
207234c0d865SRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
207334c0d865SRichard Henderson     tcg_gen_sub_vec(vece, d, a, b);
207434c0d865SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
207534c0d865SRichard Henderson }
207634c0d865SRichard Henderson 
gen_gvec_shsub(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)207734c0d865SRichard Henderson void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
207834c0d865SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
207934c0d865SRichard Henderson {
208034c0d865SRichard Henderson     static const TCGOpcode vecop_list[] = {
208134c0d865SRichard Henderson         INDEX_op_sari_vec, INDEX_op_sub_vec, 0
208234c0d865SRichard Henderson     };
208334c0d865SRichard Henderson     static const GVecGen3 g[4] = {
208434c0d865SRichard Henderson         { .fni8 = gen_shsub8_i64,
208534c0d865SRichard Henderson           .fniv = gen_shsub_vec,
208634c0d865SRichard Henderson           .opt_opc = vecop_list,
208734c0d865SRichard Henderson           .vece = MO_8 },
208834c0d865SRichard Henderson         { .fni8 = gen_shsub16_i64,
208934c0d865SRichard Henderson           .fniv = gen_shsub_vec,
209034c0d865SRichard Henderson           .opt_opc = vecop_list,
209134c0d865SRichard Henderson           .vece = MO_16 },
209234c0d865SRichard Henderson         { .fni4 = gen_shsub_i32,
209334c0d865SRichard Henderson           .fniv = gen_shsub_vec,
209434c0d865SRichard Henderson           .opt_opc = vecop_list,
209534c0d865SRichard Henderson           .vece = MO_32 },
209634c0d865SRichard Henderson     };
209734c0d865SRichard Henderson     assert(vece <= MO_32);
209834c0d865SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
209934c0d865SRichard Henderson }
210034c0d865SRichard Henderson 
gen_uhsub8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)210134c0d865SRichard Henderson static void gen_uhsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
210234c0d865SRichard Henderson {
210334c0d865SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
210434c0d865SRichard Henderson 
210534c0d865SRichard Henderson     tcg_gen_andc_i64(t, b, a);
210634c0d865SRichard Henderson     tcg_gen_vec_shr8i_i64(a, a, 1);
210734c0d865SRichard Henderson     tcg_gen_vec_shr8i_i64(b, b, 1);
210834c0d865SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
210934c0d865SRichard Henderson     tcg_gen_vec_sub8_i64(d, a, b);
211034c0d865SRichard Henderson     tcg_gen_vec_sub8_i64(d, d, t);
211134c0d865SRichard Henderson }
211234c0d865SRichard Henderson 
gen_uhsub16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)211334c0d865SRichard Henderson static void gen_uhsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
211434c0d865SRichard Henderson {
211534c0d865SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
211634c0d865SRichard Henderson 
211734c0d865SRichard Henderson     tcg_gen_andc_i64(t, b, a);
211834c0d865SRichard Henderson     tcg_gen_vec_shr16i_i64(a, a, 1);
211934c0d865SRichard Henderson     tcg_gen_vec_shr16i_i64(b, b, 1);
212034c0d865SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
212134c0d865SRichard Henderson     tcg_gen_vec_sub16_i64(d, a, b);
212234c0d865SRichard Henderson     tcg_gen_vec_sub16_i64(d, d, t);
212334c0d865SRichard Henderson }
212434c0d865SRichard Henderson 
gen_uhsub_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)212534c0d865SRichard Henderson static void gen_uhsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
212634c0d865SRichard Henderson {
212734c0d865SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
212834c0d865SRichard Henderson 
212934c0d865SRichard Henderson     tcg_gen_andc_i32(t, b, a);
213034c0d865SRichard Henderson     tcg_gen_shri_i32(a, a, 1);
213134c0d865SRichard Henderson     tcg_gen_shri_i32(b, b, 1);
213234c0d865SRichard Henderson     tcg_gen_andi_i32(t, t, 1);
213334c0d865SRichard Henderson     tcg_gen_sub_i32(d, a, b);
213434c0d865SRichard Henderson     tcg_gen_sub_i32(d, d, t);
213534c0d865SRichard Henderson }
213634c0d865SRichard Henderson 
gen_uhsub_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)213734c0d865SRichard Henderson static void gen_uhsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
213834c0d865SRichard Henderson {
213934c0d865SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
214034c0d865SRichard Henderson 
214134c0d865SRichard Henderson     tcg_gen_andc_vec(vece, t, b, a);
214234c0d865SRichard Henderson     tcg_gen_shri_vec(vece, a, a, 1);
214334c0d865SRichard Henderson     tcg_gen_shri_vec(vece, b, b, 1);
214434c0d865SRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
214534c0d865SRichard Henderson     tcg_gen_sub_vec(vece, d, a, b);
214634c0d865SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
214734c0d865SRichard Henderson }
214834c0d865SRichard Henderson 
gen_gvec_uhsub(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)214934c0d865SRichard Henderson void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
215034c0d865SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
215134c0d865SRichard Henderson {
215234c0d865SRichard Henderson     static const TCGOpcode vecop_list[] = {
215334c0d865SRichard Henderson         INDEX_op_shri_vec, INDEX_op_sub_vec, 0
215434c0d865SRichard Henderson     };
215534c0d865SRichard Henderson     static const GVecGen3 g[4] = {
215634c0d865SRichard Henderson         { .fni8 = gen_uhsub8_i64,
215734c0d865SRichard Henderson           .fniv = gen_uhsub_vec,
215834c0d865SRichard Henderson           .opt_opc = vecop_list,
215934c0d865SRichard Henderson           .vece = MO_8 },
216034c0d865SRichard Henderson         { .fni8 = gen_uhsub16_i64,
216134c0d865SRichard Henderson           .fniv = gen_uhsub_vec,
216234c0d865SRichard Henderson           .opt_opc = vecop_list,
216334c0d865SRichard Henderson           .vece = MO_16 },
216434c0d865SRichard Henderson         { .fni4 = gen_uhsub_i32,
216534c0d865SRichard Henderson           .fniv = gen_uhsub_vec,
216634c0d865SRichard Henderson           .opt_opc = vecop_list,
216734c0d865SRichard Henderson           .vece = MO_32 },
216834c0d865SRichard Henderson     };
216934c0d865SRichard Henderson     assert(vece <= MO_32);
217034c0d865SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
217134c0d865SRichard Henderson }
21728989b95eSRichard Henderson 
gen_srhadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)21738989b95eSRichard Henderson static void gen_srhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
21748989b95eSRichard Henderson {
21758989b95eSRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
21768989b95eSRichard Henderson 
21778989b95eSRichard Henderson     tcg_gen_or_i64(t, a, b);
21788989b95eSRichard Henderson     tcg_gen_vec_sar8i_i64(a, a, 1);
21798989b95eSRichard Henderson     tcg_gen_vec_sar8i_i64(b, b, 1);
21808989b95eSRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
21818989b95eSRichard Henderson     tcg_gen_vec_add8_i64(d, a, b);
21828989b95eSRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
21838989b95eSRichard Henderson }
21848989b95eSRichard Henderson 
gen_srhadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)21858989b95eSRichard Henderson static void gen_srhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
21868989b95eSRichard Henderson {
21878989b95eSRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
21888989b95eSRichard Henderson 
21898989b95eSRichard Henderson     tcg_gen_or_i64(t, a, b);
21908989b95eSRichard Henderson     tcg_gen_vec_sar16i_i64(a, a, 1);
21918989b95eSRichard Henderson     tcg_gen_vec_sar16i_i64(b, b, 1);
21928989b95eSRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
21938989b95eSRichard Henderson     tcg_gen_vec_add16_i64(d, a, b);
21948989b95eSRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
21958989b95eSRichard Henderson }
21968989b95eSRichard Henderson 
gen_srhadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)21978989b95eSRichard Henderson static void gen_srhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
21988989b95eSRichard Henderson {
21998989b95eSRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
22008989b95eSRichard Henderson 
22018989b95eSRichard Henderson     tcg_gen_or_i32(t, a, b);
22028989b95eSRichard Henderson     tcg_gen_sari_i32(a, a, 1);
22038989b95eSRichard Henderson     tcg_gen_sari_i32(b, b, 1);
22048989b95eSRichard Henderson     tcg_gen_andi_i32(t, t, 1);
22058989b95eSRichard Henderson     tcg_gen_add_i32(d, a, b);
22068989b95eSRichard Henderson     tcg_gen_add_i32(d, d, t);
22078989b95eSRichard Henderson }
22088989b95eSRichard Henderson 
gen_srhadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)22098989b95eSRichard Henderson static void gen_srhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
22108989b95eSRichard Henderson {
22118989b95eSRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
22128989b95eSRichard Henderson 
22138989b95eSRichard Henderson     tcg_gen_or_vec(vece, t, a, b);
22148989b95eSRichard Henderson     tcg_gen_sari_vec(vece, a, a, 1);
22158989b95eSRichard Henderson     tcg_gen_sari_vec(vece, b, b, 1);
22168989b95eSRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
22178989b95eSRichard Henderson     tcg_gen_add_vec(vece, d, a, b);
22188989b95eSRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
22198989b95eSRichard Henderson }
22208989b95eSRichard Henderson 
gen_gvec_srhadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)22218989b95eSRichard Henderson void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
22228989b95eSRichard Henderson                      uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
22238989b95eSRichard Henderson {
22248989b95eSRichard Henderson     static const TCGOpcode vecop_list[] = {
22258989b95eSRichard Henderson         INDEX_op_sari_vec, INDEX_op_add_vec, 0
22268989b95eSRichard Henderson     };
22278989b95eSRichard Henderson     static const GVecGen3 g[] = {
22288989b95eSRichard Henderson         { .fni8 = gen_srhadd8_i64,
22298989b95eSRichard Henderson           .fniv = gen_srhadd_vec,
22308989b95eSRichard Henderson           .opt_opc = vecop_list,
22318989b95eSRichard Henderson           .vece = MO_8 },
22328989b95eSRichard Henderson         { .fni8 = gen_srhadd16_i64,
22338989b95eSRichard Henderson           .fniv = gen_srhadd_vec,
22348989b95eSRichard Henderson           .opt_opc = vecop_list,
22358989b95eSRichard Henderson           .vece = MO_16 },
22368989b95eSRichard Henderson         { .fni4 = gen_srhadd_i32,
22378989b95eSRichard Henderson           .fniv = gen_srhadd_vec,
22388989b95eSRichard Henderson           .opt_opc = vecop_list,
22398989b95eSRichard Henderson           .vece = MO_32 },
22408989b95eSRichard Henderson     };
22418989b95eSRichard Henderson     assert(vece <= MO_32);
22428989b95eSRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
22438989b95eSRichard Henderson }
22448989b95eSRichard Henderson 
gen_urhadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)22458989b95eSRichard Henderson static void gen_urhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
22468989b95eSRichard Henderson {
22478989b95eSRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
22488989b95eSRichard Henderson 
22498989b95eSRichard Henderson     tcg_gen_or_i64(t, a, b);
22508989b95eSRichard Henderson     tcg_gen_vec_shr8i_i64(a, a, 1);
22518989b95eSRichard Henderson     tcg_gen_vec_shr8i_i64(b, b, 1);
22528989b95eSRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
22538989b95eSRichard Henderson     tcg_gen_vec_add8_i64(d, a, b);
22548989b95eSRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
22558989b95eSRichard Henderson }
22568989b95eSRichard Henderson 
gen_urhadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)22578989b95eSRichard Henderson static void gen_urhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
22588989b95eSRichard Henderson {
22598989b95eSRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
22608989b95eSRichard Henderson 
22618989b95eSRichard Henderson     tcg_gen_or_i64(t, a, b);
22628989b95eSRichard Henderson     tcg_gen_vec_shr16i_i64(a, a, 1);
22638989b95eSRichard Henderson     tcg_gen_vec_shr16i_i64(b, b, 1);
22648989b95eSRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
22658989b95eSRichard Henderson     tcg_gen_vec_add16_i64(d, a, b);
22668989b95eSRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
22678989b95eSRichard Henderson }
22688989b95eSRichard Henderson 
gen_urhadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)22698989b95eSRichard Henderson static void gen_urhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
22708989b95eSRichard Henderson {
22718989b95eSRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
22728989b95eSRichard Henderson 
22738989b95eSRichard Henderson     tcg_gen_or_i32(t, a, b);
22748989b95eSRichard Henderson     tcg_gen_shri_i32(a, a, 1);
22758989b95eSRichard Henderson     tcg_gen_shri_i32(b, b, 1);
22768989b95eSRichard Henderson     tcg_gen_andi_i32(t, t, 1);
22778989b95eSRichard Henderson     tcg_gen_add_i32(d, a, b);
22788989b95eSRichard Henderson     tcg_gen_add_i32(d, d, t);
22798989b95eSRichard Henderson }
22808989b95eSRichard Henderson 
gen_urhadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)22818989b95eSRichard Henderson static void gen_urhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
22828989b95eSRichard Henderson {
22838989b95eSRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
22848989b95eSRichard Henderson 
22858989b95eSRichard Henderson     tcg_gen_or_vec(vece, t, a, b);
22868989b95eSRichard Henderson     tcg_gen_shri_vec(vece, a, a, 1);
22878989b95eSRichard Henderson     tcg_gen_shri_vec(vece, b, b, 1);
22888989b95eSRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
22898989b95eSRichard Henderson     tcg_gen_add_vec(vece, d, a, b);
22908989b95eSRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
22918989b95eSRichard Henderson }
22928989b95eSRichard Henderson 
gen_gvec_urhadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)22938989b95eSRichard Henderson void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
22948989b95eSRichard Henderson                      uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
22958989b95eSRichard Henderson {
22968989b95eSRichard Henderson     static const TCGOpcode vecop_list[] = {
22978989b95eSRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
22988989b95eSRichard Henderson     };
22998989b95eSRichard Henderson     static const GVecGen3 g[] = {
23008989b95eSRichard Henderson         { .fni8 = gen_urhadd8_i64,
23018989b95eSRichard Henderson           .fniv = gen_urhadd_vec,
23028989b95eSRichard Henderson           .opt_opc = vecop_list,
23038989b95eSRichard Henderson           .vece = MO_8 },
23048989b95eSRichard Henderson         { .fni8 = gen_urhadd16_i64,
23058989b95eSRichard Henderson           .fniv = gen_urhadd_vec,
23068989b95eSRichard Henderson           .opt_opc = vecop_list,
23078989b95eSRichard Henderson           .vece = MO_16 },
23088989b95eSRichard Henderson         { .fni4 = gen_urhadd_i32,
23098989b95eSRichard Henderson           .fniv = gen_urhadd_vec,
23108989b95eSRichard Henderson           .opt_opc = vecop_list,
23118989b95eSRichard Henderson           .vece = MO_32 },
23128989b95eSRichard Henderson     };
23138989b95eSRichard Henderson     assert(vece <= MO_32);
23148989b95eSRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
23158989b95eSRichard Henderson }
2316