xref: /openbmc/qemu/target/arm/tcg/gengvec.c (revision 01dc65a3bc262ab1bec8fe89775e9bbfa627becb)
109a52d85SRichard Henderson /*
209a52d85SRichard Henderson  *  ARM generic vector expansion
309a52d85SRichard Henderson  *
409a52d85SRichard Henderson  *  Copyright (c) 2003 Fabrice Bellard
509a52d85SRichard Henderson  *  Copyright (c) 2005-2007 CodeSourcery
609a52d85SRichard Henderson  *  Copyright (c) 2007 OpenedHand, Ltd.
709a52d85SRichard Henderson  *
809a52d85SRichard Henderson  * This library is free software; you can redistribute it and/or
909a52d85SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
1009a52d85SRichard Henderson  * License as published by the Free Software Foundation; either
1109a52d85SRichard Henderson  * version 2.1 of the License, or (at your option) any later version.
1209a52d85SRichard Henderson  *
1309a52d85SRichard Henderson  * This library is distributed in the hope that it will be useful,
1409a52d85SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
1509a52d85SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1609a52d85SRichard Henderson  * Lesser General Public License for more details.
1709a52d85SRichard Henderson  *
1809a52d85SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
1909a52d85SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
2009a52d85SRichard Henderson  */
2109a52d85SRichard Henderson 
2209a52d85SRichard Henderson #include "qemu/osdep.h"
2309a52d85SRichard Henderson #include "translate.h"
2409a52d85SRichard Henderson 
2509a52d85SRichard Henderson 
gen_gvec_fn3_qc(uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz,gen_helper_gvec_3_ptr * fn)2609a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2709a52d85SRichard Henderson                             uint32_t opr_sz, uint32_t max_sz,
2809a52d85SRichard Henderson                             gen_helper_gvec_3_ptr *fn)
2909a52d85SRichard Henderson {
3009a52d85SRichard Henderson     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3109a52d85SRichard Henderson 
3201d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
3309a52d85SRichard Henderson     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
3409a52d85SRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3509a52d85SRichard Henderson                        opr_sz, max_sz, 0, fn);
3609a52d85SRichard Henderson }
3709a52d85SRichard Henderson 
gen_gvec_sqdmulh_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)388f81dcedSRichard Henderson void gen_gvec_sqdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
398f81dcedSRichard Henderson                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
408f81dcedSRichard Henderson {
418f81dcedSRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
428f81dcedSRichard Henderson         gen_helper_neon_sqdmulh_h, gen_helper_neon_sqdmulh_s
438f81dcedSRichard Henderson     };
448f81dcedSRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
458f81dcedSRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
468f81dcedSRichard Henderson }
478f81dcedSRichard Henderson 
gen_gvec_sqrdmulh_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)488f81dcedSRichard Henderson void gen_gvec_sqrdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
498f81dcedSRichard Henderson                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
508f81dcedSRichard Henderson {
518f81dcedSRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
528f81dcedSRichard Henderson         gen_helper_neon_sqrdmulh_h, gen_helper_neon_sqrdmulh_s
538f81dcedSRichard Henderson     };
548f81dcedSRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
558f81dcedSRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
568f81dcedSRichard Henderson }
578f81dcedSRichard Henderson 
gen_gvec_sqrdmlah_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)5809a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5909a52d85SRichard Henderson                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
6009a52d85SRichard Henderson {
6109a52d85SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
6209a52d85SRichard Henderson         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
6309a52d85SRichard Henderson     };
6409a52d85SRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
6509a52d85SRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
6609a52d85SRichard Henderson }
6709a52d85SRichard Henderson 
gen_gvec_sqrdmlsh_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)6809a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
6909a52d85SRichard Henderson                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
7009a52d85SRichard Henderson {
7109a52d85SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
7209a52d85SRichard Henderson         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
7309a52d85SRichard Henderson     };
7409a52d85SRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
7509a52d85SRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
7609a52d85SRichard Henderson }
7709a52d85SRichard Henderson 
7809a52d85SRichard Henderson #define GEN_CMP0(NAME, COND)                              \
7909a52d85SRichard Henderson     void NAME(unsigned vece, uint32_t d, uint32_t m,      \
8009a52d85SRichard Henderson               uint32_t opr_sz, uint32_t max_sz)           \
8109a52d85SRichard Henderson     { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
8209a52d85SRichard Henderson 
GEN_CMP0(gen_gvec_ceq0,TCG_COND_EQ)8309a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
8409a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
8509a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
8609a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
8709a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
8809a52d85SRichard Henderson 
8909a52d85SRichard Henderson #undef GEN_CMP0
9009a52d85SRichard Henderson 
9100bcab5bSRichard Henderson void gen_gvec_sshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
9200bcab5bSRichard Henderson                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
9300bcab5bSRichard Henderson {
9400bcab5bSRichard Henderson     /* Signed shift out of range results in all-sign-bits */
9500bcab5bSRichard Henderson     shift = MIN(shift, (8 << vece) - 1);
9600bcab5bSRichard Henderson     tcg_gen_gvec_sari(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz);
9700bcab5bSRichard Henderson }
9800bcab5bSRichard Henderson 
gen_gvec_ushr(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)9900bcab5bSRichard Henderson void gen_gvec_ushr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
10000bcab5bSRichard Henderson                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
10100bcab5bSRichard Henderson {
10200bcab5bSRichard Henderson     /* Unsigned shift out of range results in all-zero-bits */
10300bcab5bSRichard Henderson     if (shift >= (8 << vece)) {
10400bcab5bSRichard Henderson         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
10500bcab5bSRichard Henderson     } else {
10600bcab5bSRichard Henderson         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz);
10700bcab5bSRichard Henderson     }
10800bcab5bSRichard Henderson }
10900bcab5bSRichard Henderson 
gen_ssra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)11009a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
11109a52d85SRichard Henderson {
11209a52d85SRichard Henderson     tcg_gen_vec_sar8i_i64(a, a, shift);
11309a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, a);
11409a52d85SRichard Henderson }
11509a52d85SRichard Henderson 
gen_ssra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)11609a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
11709a52d85SRichard Henderson {
11809a52d85SRichard Henderson     tcg_gen_vec_sar16i_i64(a, a, shift);
11909a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, a);
12009a52d85SRichard Henderson }
12109a52d85SRichard Henderson 
gen_ssra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)12209a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
12309a52d85SRichard Henderson {
12409a52d85SRichard Henderson     tcg_gen_sari_i32(a, a, shift);
12509a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
12609a52d85SRichard Henderson }
12709a52d85SRichard Henderson 
gen_ssra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)12809a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
12909a52d85SRichard Henderson {
13009a52d85SRichard Henderson     tcg_gen_sari_i64(a, a, shift);
13109a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
13209a52d85SRichard Henderson }
13309a52d85SRichard Henderson 
gen_ssra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)13409a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
13509a52d85SRichard Henderson {
13609a52d85SRichard Henderson     tcg_gen_sari_vec(vece, a, a, sh);
13709a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
13809a52d85SRichard Henderson }
13909a52d85SRichard Henderson 
gen_gvec_ssra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)14009a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
14109a52d85SRichard Henderson                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
14209a52d85SRichard Henderson {
14309a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
14409a52d85SRichard Henderson         INDEX_op_sari_vec, INDEX_op_add_vec, 0
14509a52d85SRichard Henderson     };
14609a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
14709a52d85SRichard Henderson         { .fni8 = gen_ssra8_i64,
14809a52d85SRichard Henderson           .fniv = gen_ssra_vec,
14909a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_b,
15009a52d85SRichard Henderson           .load_dest = true,
15109a52d85SRichard Henderson           .opt_opc = vecop_list,
15209a52d85SRichard Henderson           .vece = MO_8 },
15309a52d85SRichard Henderson         { .fni8 = gen_ssra16_i64,
15409a52d85SRichard Henderson           .fniv = gen_ssra_vec,
15509a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_h,
15609a52d85SRichard Henderson           .load_dest = true,
15709a52d85SRichard Henderson           .opt_opc = vecop_list,
15809a52d85SRichard Henderson           .vece = MO_16 },
15909a52d85SRichard Henderson         { .fni4 = gen_ssra32_i32,
16009a52d85SRichard Henderson           .fniv = gen_ssra_vec,
16109a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_s,
16209a52d85SRichard Henderson           .load_dest = true,
16309a52d85SRichard Henderson           .opt_opc = vecop_list,
16409a52d85SRichard Henderson           .vece = MO_32 },
16509a52d85SRichard Henderson         { .fni8 = gen_ssra64_i64,
16609a52d85SRichard Henderson           .fniv = gen_ssra_vec,
16709a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_d,
16809a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
16909a52d85SRichard Henderson           .opt_opc = vecop_list,
17009a52d85SRichard Henderson           .load_dest = true,
17109a52d85SRichard Henderson           .vece = MO_64 },
17209a52d85SRichard Henderson     };
17309a52d85SRichard Henderson 
17409a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
17509a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
17609a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
17709a52d85SRichard Henderson 
17809a52d85SRichard Henderson     /*
17909a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
18009a52d85SRichard Henderson      * Signed results in all sign bits.
18109a52d85SRichard Henderson      */
18209a52d85SRichard Henderson     shift = MIN(shift, (8 << vece) - 1);
18309a52d85SRichard Henderson     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
18409a52d85SRichard Henderson }
18509a52d85SRichard Henderson 
gen_usra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)18609a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
18709a52d85SRichard Henderson {
18809a52d85SRichard Henderson     tcg_gen_vec_shr8i_i64(a, a, shift);
18909a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, a);
19009a52d85SRichard Henderson }
19109a52d85SRichard Henderson 
gen_usra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)19209a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
19309a52d85SRichard Henderson {
19409a52d85SRichard Henderson     tcg_gen_vec_shr16i_i64(a, a, shift);
19509a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, a);
19609a52d85SRichard Henderson }
19709a52d85SRichard Henderson 
gen_usra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)19809a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
19909a52d85SRichard Henderson {
20009a52d85SRichard Henderson     tcg_gen_shri_i32(a, a, shift);
20109a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
20209a52d85SRichard Henderson }
20309a52d85SRichard Henderson 
gen_usra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)20409a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
20509a52d85SRichard Henderson {
20609a52d85SRichard Henderson     tcg_gen_shri_i64(a, a, shift);
20709a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
20809a52d85SRichard Henderson }
20909a52d85SRichard Henderson 
gen_usra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)21009a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
21109a52d85SRichard Henderson {
21209a52d85SRichard Henderson     tcg_gen_shri_vec(vece, a, a, sh);
21309a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
21409a52d85SRichard Henderson }
21509a52d85SRichard Henderson 
gen_gvec_usra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)21609a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
21709a52d85SRichard Henderson                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
21809a52d85SRichard Henderson {
21909a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
22009a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
22109a52d85SRichard Henderson     };
22209a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
22309a52d85SRichard Henderson         { .fni8 = gen_usra8_i64,
22409a52d85SRichard Henderson           .fniv = gen_usra_vec,
22509a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_b,
22609a52d85SRichard Henderson           .load_dest = true,
22709a52d85SRichard Henderson           .opt_opc = vecop_list,
22809a52d85SRichard Henderson           .vece = MO_8, },
22909a52d85SRichard Henderson         { .fni8 = gen_usra16_i64,
23009a52d85SRichard Henderson           .fniv = gen_usra_vec,
23109a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_h,
23209a52d85SRichard Henderson           .load_dest = true,
23309a52d85SRichard Henderson           .opt_opc = vecop_list,
23409a52d85SRichard Henderson           .vece = MO_16, },
23509a52d85SRichard Henderson         { .fni4 = gen_usra32_i32,
23609a52d85SRichard Henderson           .fniv = gen_usra_vec,
23709a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_s,
23809a52d85SRichard Henderson           .load_dest = true,
23909a52d85SRichard Henderson           .opt_opc = vecop_list,
24009a52d85SRichard Henderson           .vece = MO_32, },
24109a52d85SRichard Henderson         { .fni8 = gen_usra64_i64,
24209a52d85SRichard Henderson           .fniv = gen_usra_vec,
24309a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_d,
24409a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
24509a52d85SRichard Henderson           .load_dest = true,
24609a52d85SRichard Henderson           .opt_opc = vecop_list,
24709a52d85SRichard Henderson           .vece = MO_64, },
24809a52d85SRichard Henderson     };
24909a52d85SRichard Henderson 
25009a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
25109a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
25209a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
25309a52d85SRichard Henderson 
25409a52d85SRichard Henderson     /*
25509a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
25609a52d85SRichard Henderson      * Unsigned results in all zeros as input to accumulate: nop.
25709a52d85SRichard Henderson      */
25809a52d85SRichard Henderson     if (shift < (8 << vece)) {
25909a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
26009a52d85SRichard Henderson     } else {
26109a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
26209a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
26309a52d85SRichard Henderson     }
26409a52d85SRichard Henderson }
26509a52d85SRichard Henderson 
26609a52d85SRichard Henderson /*
26709a52d85SRichard Henderson  * Shift one less than the requested amount, and the low bit is
26809a52d85SRichard Henderson  * the rounding bit.  For the 8 and 16-bit operations, because we
26909a52d85SRichard Henderson  * mask the low bit, we can perform a normal integer shift instead
27009a52d85SRichard Henderson  * of a vector shift.
27109a52d85SRichard Henderson  */
gen_srshr8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)27209a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
27309a52d85SRichard Henderson {
27409a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
27509a52d85SRichard Henderson 
27609a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
27709a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
27809a52d85SRichard Henderson     tcg_gen_vec_sar8i_i64(d, a, sh);
27909a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
28009a52d85SRichard Henderson }
28109a52d85SRichard Henderson 
gen_srshr16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)28209a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
28309a52d85SRichard Henderson {
28409a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
28509a52d85SRichard Henderson 
28609a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
28709a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
28809a52d85SRichard Henderson     tcg_gen_vec_sar16i_i64(d, a, sh);
28909a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
29009a52d85SRichard Henderson }
29109a52d85SRichard Henderson 
gen_srshr32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)29209a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
29309a52d85SRichard Henderson {
29409a52d85SRichard Henderson     TCGv_i32 t;
29509a52d85SRichard Henderson 
29609a52d85SRichard Henderson     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
29709a52d85SRichard Henderson     if (sh == 32) {
29809a52d85SRichard Henderson         tcg_gen_movi_i32(d, 0);
29909a52d85SRichard Henderson         return;
30009a52d85SRichard Henderson     }
30109a52d85SRichard Henderson     t = tcg_temp_new_i32();
30209a52d85SRichard Henderson     tcg_gen_extract_i32(t, a, sh - 1, 1);
30309a52d85SRichard Henderson     tcg_gen_sari_i32(d, a, sh);
30409a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
30509a52d85SRichard Henderson }
30609a52d85SRichard Henderson 
gen_srshr64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)30709a52d85SRichard Henderson void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
30809a52d85SRichard Henderson {
30909a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
31009a52d85SRichard Henderson 
31109a52d85SRichard Henderson     tcg_gen_extract_i64(t, a, sh - 1, 1);
31209a52d85SRichard Henderson     tcg_gen_sari_i64(d, a, sh);
31309a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
31409a52d85SRichard Henderson }
31509a52d85SRichard Henderson 
gen_srshr_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)31609a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
31709a52d85SRichard Henderson {
31809a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
319143e179cSRichard Henderson     TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
32009a52d85SRichard Henderson 
32109a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, sh - 1);
32209a52d85SRichard Henderson     tcg_gen_and_vec(vece, t, t, ones);
32309a52d85SRichard Henderson     tcg_gen_sari_vec(vece, d, a, sh);
32409a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
32509a52d85SRichard Henderson }
32609a52d85SRichard Henderson 
gen_gvec_srshr(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)32709a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
32809a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
32909a52d85SRichard Henderson {
33009a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
33109a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
33209a52d85SRichard Henderson     };
33309a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
33409a52d85SRichard Henderson         { .fni8 = gen_srshr8_i64,
33509a52d85SRichard Henderson           .fniv = gen_srshr_vec,
33609a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_b,
33709a52d85SRichard Henderson           .opt_opc = vecop_list,
33809a52d85SRichard Henderson           .vece = MO_8 },
33909a52d85SRichard Henderson         { .fni8 = gen_srshr16_i64,
34009a52d85SRichard Henderson           .fniv = gen_srshr_vec,
34109a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_h,
34209a52d85SRichard Henderson           .opt_opc = vecop_list,
34309a52d85SRichard Henderson           .vece = MO_16 },
34409a52d85SRichard Henderson         { .fni4 = gen_srshr32_i32,
34509a52d85SRichard Henderson           .fniv = gen_srshr_vec,
34609a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_s,
34709a52d85SRichard Henderson           .opt_opc = vecop_list,
34809a52d85SRichard Henderson           .vece = MO_32 },
34909a52d85SRichard Henderson         { .fni8 = gen_srshr64_i64,
35009a52d85SRichard Henderson           .fniv = gen_srshr_vec,
35109a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_d,
35209a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
35309a52d85SRichard Henderson           .opt_opc = vecop_list,
35409a52d85SRichard Henderson           .vece = MO_64 },
35509a52d85SRichard Henderson     };
35609a52d85SRichard Henderson 
35709a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
35809a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
35909a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
36009a52d85SRichard Henderson 
36109a52d85SRichard Henderson     if (shift == (8 << vece)) {
36209a52d85SRichard Henderson         /*
36309a52d85SRichard Henderson          * Shifts larger than the element size are architecturally valid.
36409a52d85SRichard Henderson          * Signed results in all sign bits.  With rounding, this produces
36509a52d85SRichard Henderson          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
36609a52d85SRichard Henderson          * I.e. always zero.
36709a52d85SRichard Henderson          */
36809a52d85SRichard Henderson         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
36909a52d85SRichard Henderson     } else {
37009a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
37109a52d85SRichard Henderson     }
37209a52d85SRichard Henderson }
37309a52d85SRichard Henderson 
gen_srsra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)37409a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
37509a52d85SRichard Henderson {
37609a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
37709a52d85SRichard Henderson 
37809a52d85SRichard Henderson     gen_srshr8_i64(t, a, sh);
37909a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
38009a52d85SRichard Henderson }
38109a52d85SRichard Henderson 
gen_srsra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)38209a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
38309a52d85SRichard Henderson {
38409a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
38509a52d85SRichard Henderson 
38609a52d85SRichard Henderson     gen_srshr16_i64(t, a, sh);
38709a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
38809a52d85SRichard Henderson }
38909a52d85SRichard Henderson 
gen_srsra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)39009a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
39109a52d85SRichard Henderson {
39209a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
39309a52d85SRichard Henderson 
39409a52d85SRichard Henderson     gen_srshr32_i32(t, a, sh);
39509a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
39609a52d85SRichard Henderson }
39709a52d85SRichard Henderson 
gen_srsra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)39809a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
39909a52d85SRichard Henderson {
40009a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
40109a52d85SRichard Henderson 
40209a52d85SRichard Henderson     gen_srshr64_i64(t, a, sh);
40309a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
40409a52d85SRichard Henderson }
40509a52d85SRichard Henderson 
gen_srsra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)40609a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
40709a52d85SRichard Henderson {
40809a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
40909a52d85SRichard Henderson 
41009a52d85SRichard Henderson     gen_srshr_vec(vece, t, a, sh);
41109a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
41209a52d85SRichard Henderson }
41309a52d85SRichard Henderson 
gen_gvec_srsra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)41409a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
41509a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
41609a52d85SRichard Henderson {
41709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
41809a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
41909a52d85SRichard Henderson     };
42009a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
42109a52d85SRichard Henderson         { .fni8 = gen_srsra8_i64,
42209a52d85SRichard Henderson           .fniv = gen_srsra_vec,
42309a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_b,
42409a52d85SRichard Henderson           .opt_opc = vecop_list,
42509a52d85SRichard Henderson           .load_dest = true,
42609a52d85SRichard Henderson           .vece = MO_8 },
42709a52d85SRichard Henderson         { .fni8 = gen_srsra16_i64,
42809a52d85SRichard Henderson           .fniv = gen_srsra_vec,
42909a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_h,
43009a52d85SRichard Henderson           .opt_opc = vecop_list,
43109a52d85SRichard Henderson           .load_dest = true,
43209a52d85SRichard Henderson           .vece = MO_16 },
43309a52d85SRichard Henderson         { .fni4 = gen_srsra32_i32,
43409a52d85SRichard Henderson           .fniv = gen_srsra_vec,
43509a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_s,
43609a52d85SRichard Henderson           .opt_opc = vecop_list,
43709a52d85SRichard Henderson           .load_dest = true,
43809a52d85SRichard Henderson           .vece = MO_32 },
43909a52d85SRichard Henderson         { .fni8 = gen_srsra64_i64,
44009a52d85SRichard Henderson           .fniv = gen_srsra_vec,
44109a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_d,
44209a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
44309a52d85SRichard Henderson           .opt_opc = vecop_list,
44409a52d85SRichard Henderson           .load_dest = true,
44509a52d85SRichard Henderson           .vece = MO_64 },
44609a52d85SRichard Henderson     };
44709a52d85SRichard Henderson 
44809a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
44909a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
45009a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
45109a52d85SRichard Henderson 
45209a52d85SRichard Henderson     /*
45309a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
45409a52d85SRichard Henderson      * Signed results in all sign bits.  With rounding, this produces
45509a52d85SRichard Henderson      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
45609a52d85SRichard Henderson      * I.e. always zero.  With accumulation, this leaves D unchanged.
45709a52d85SRichard Henderson      */
45809a52d85SRichard Henderson     if (shift == (8 << vece)) {
45909a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
46009a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
46109a52d85SRichard Henderson     } else {
46209a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
46309a52d85SRichard Henderson     }
46409a52d85SRichard Henderson }
46509a52d85SRichard Henderson 
gen_urshr8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)46609a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
46709a52d85SRichard Henderson {
46809a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
46909a52d85SRichard Henderson 
47009a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
47109a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
47209a52d85SRichard Henderson     tcg_gen_vec_shr8i_i64(d, a, sh);
47309a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
47409a52d85SRichard Henderson }
47509a52d85SRichard Henderson 
gen_urshr16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)47609a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
47709a52d85SRichard Henderson {
47809a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
47909a52d85SRichard Henderson 
48009a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
48109a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
48209a52d85SRichard Henderson     tcg_gen_vec_shr16i_i64(d, a, sh);
48309a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
48409a52d85SRichard Henderson }
48509a52d85SRichard Henderson 
gen_urshr32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)48609a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
48709a52d85SRichard Henderson {
48809a52d85SRichard Henderson     TCGv_i32 t;
48909a52d85SRichard Henderson 
49009a52d85SRichard Henderson     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
49109a52d85SRichard Henderson     if (sh == 32) {
49209a52d85SRichard Henderson         tcg_gen_extract_i32(d, a, sh - 1, 1);
49309a52d85SRichard Henderson         return;
49409a52d85SRichard Henderson     }
49509a52d85SRichard Henderson     t = tcg_temp_new_i32();
49609a52d85SRichard Henderson     tcg_gen_extract_i32(t, a, sh - 1, 1);
49709a52d85SRichard Henderson     tcg_gen_shri_i32(d, a, sh);
49809a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
49909a52d85SRichard Henderson }
50009a52d85SRichard Henderson 
gen_urshr64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)50109a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
50209a52d85SRichard Henderson {
50309a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
50409a52d85SRichard Henderson 
50509a52d85SRichard Henderson     tcg_gen_extract_i64(t, a, sh - 1, 1);
50609a52d85SRichard Henderson     tcg_gen_shri_i64(d, a, sh);
50709a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
50809a52d85SRichard Henderson }
50909a52d85SRichard Henderson 
gen_urshr_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t shift)51009a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
51109a52d85SRichard Henderson {
51209a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
513143e179cSRichard Henderson     TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
51409a52d85SRichard Henderson 
51509a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, shift - 1);
51609a52d85SRichard Henderson     tcg_gen_and_vec(vece, t, t, ones);
51709a52d85SRichard Henderson     tcg_gen_shri_vec(vece, d, a, shift);
51809a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
51909a52d85SRichard Henderson }
52009a52d85SRichard Henderson 
gen_gvec_urshr(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)52109a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
52209a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
52309a52d85SRichard Henderson {
52409a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
52509a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
52609a52d85SRichard Henderson     };
52709a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
52809a52d85SRichard Henderson         { .fni8 = gen_urshr8_i64,
52909a52d85SRichard Henderson           .fniv = gen_urshr_vec,
53009a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_b,
53109a52d85SRichard Henderson           .opt_opc = vecop_list,
53209a52d85SRichard Henderson           .vece = MO_8 },
53309a52d85SRichard Henderson         { .fni8 = gen_urshr16_i64,
53409a52d85SRichard Henderson           .fniv = gen_urshr_vec,
53509a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_h,
53609a52d85SRichard Henderson           .opt_opc = vecop_list,
53709a52d85SRichard Henderson           .vece = MO_16 },
53809a52d85SRichard Henderson         { .fni4 = gen_urshr32_i32,
53909a52d85SRichard Henderson           .fniv = gen_urshr_vec,
54009a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_s,
54109a52d85SRichard Henderson           .opt_opc = vecop_list,
54209a52d85SRichard Henderson           .vece = MO_32 },
54309a52d85SRichard Henderson         { .fni8 = gen_urshr64_i64,
54409a52d85SRichard Henderson           .fniv = gen_urshr_vec,
54509a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_d,
54609a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
54709a52d85SRichard Henderson           .opt_opc = vecop_list,
54809a52d85SRichard Henderson           .vece = MO_64 },
54909a52d85SRichard Henderson     };
55009a52d85SRichard Henderson 
55109a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
55209a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
55309a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
55409a52d85SRichard Henderson 
55509a52d85SRichard Henderson     if (shift == (8 << vece)) {
55609a52d85SRichard Henderson         /*
55709a52d85SRichard Henderson          * Shifts larger than the element size are architecturally valid.
55809a52d85SRichard Henderson          * Unsigned results in zero.  With rounding, this produces a
55909a52d85SRichard Henderson          * copy of the most significant bit.
56009a52d85SRichard Henderson          */
56109a52d85SRichard Henderson         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
56209a52d85SRichard Henderson     } else {
56309a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
56409a52d85SRichard Henderson     }
56509a52d85SRichard Henderson }
56609a52d85SRichard Henderson 
gen_ursra8_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)56709a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
56809a52d85SRichard Henderson {
56909a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
57009a52d85SRichard Henderson 
57109a52d85SRichard Henderson     if (sh == 8) {
57209a52d85SRichard Henderson         tcg_gen_vec_shr8i_i64(t, a, 7);
57309a52d85SRichard Henderson     } else {
57409a52d85SRichard Henderson         gen_urshr8_i64(t, a, sh);
57509a52d85SRichard Henderson     }
57609a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
57709a52d85SRichard Henderson }
57809a52d85SRichard Henderson 
gen_ursra16_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)57909a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
58009a52d85SRichard Henderson {
58109a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
58209a52d85SRichard Henderson 
58309a52d85SRichard Henderson     if (sh == 16) {
58409a52d85SRichard Henderson         tcg_gen_vec_shr16i_i64(t, a, 15);
58509a52d85SRichard Henderson     } else {
58609a52d85SRichard Henderson         gen_urshr16_i64(t, a, sh);
58709a52d85SRichard Henderson     }
58809a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
58909a52d85SRichard Henderson }
59009a52d85SRichard Henderson 
gen_ursra32_i32(TCGv_i32 d,TCGv_i32 a,int32_t sh)59109a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
59209a52d85SRichard Henderson {
59309a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
59409a52d85SRichard Henderson 
59509a52d85SRichard Henderson     if (sh == 32) {
59609a52d85SRichard Henderson         tcg_gen_shri_i32(t, a, 31);
59709a52d85SRichard Henderson     } else {
59809a52d85SRichard Henderson         gen_urshr32_i32(t, a, sh);
59909a52d85SRichard Henderson     }
60009a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
60109a52d85SRichard Henderson }
60209a52d85SRichard Henderson 
gen_ursra64_i64(TCGv_i64 d,TCGv_i64 a,int64_t sh)60309a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
60409a52d85SRichard Henderson {
60509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
60609a52d85SRichard Henderson 
60709a52d85SRichard Henderson     if (sh == 64) {
60809a52d85SRichard Henderson         tcg_gen_shri_i64(t, a, 63);
60909a52d85SRichard Henderson     } else {
61009a52d85SRichard Henderson         gen_urshr64_i64(t, a, sh);
61109a52d85SRichard Henderson     }
61209a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
61309a52d85SRichard Henderson }
61409a52d85SRichard Henderson 
gen_ursra_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)61509a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
61609a52d85SRichard Henderson {
61709a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
61809a52d85SRichard Henderson 
61909a52d85SRichard Henderson     if (sh == (8 << vece)) {
62009a52d85SRichard Henderson         tcg_gen_shri_vec(vece, t, a, sh - 1);
62109a52d85SRichard Henderson     } else {
62209a52d85SRichard Henderson         gen_urshr_vec(vece, t, a, sh);
62309a52d85SRichard Henderson     }
62409a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
62509a52d85SRichard Henderson }
62609a52d85SRichard Henderson 
gen_gvec_ursra(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)62709a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
62809a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
62909a52d85SRichard Henderson {
63009a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
63109a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
63209a52d85SRichard Henderson     };
63309a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
63409a52d85SRichard Henderson         { .fni8 = gen_ursra8_i64,
63509a52d85SRichard Henderson           .fniv = gen_ursra_vec,
63609a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_b,
63709a52d85SRichard Henderson           .opt_opc = vecop_list,
63809a52d85SRichard Henderson           .load_dest = true,
63909a52d85SRichard Henderson           .vece = MO_8 },
64009a52d85SRichard Henderson         { .fni8 = gen_ursra16_i64,
64109a52d85SRichard Henderson           .fniv = gen_ursra_vec,
64209a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_h,
64309a52d85SRichard Henderson           .opt_opc = vecop_list,
64409a52d85SRichard Henderson           .load_dest = true,
64509a52d85SRichard Henderson           .vece = MO_16 },
64609a52d85SRichard Henderson         { .fni4 = gen_ursra32_i32,
64709a52d85SRichard Henderson           .fniv = gen_ursra_vec,
64809a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_s,
64909a52d85SRichard Henderson           .opt_opc = vecop_list,
65009a52d85SRichard Henderson           .load_dest = true,
65109a52d85SRichard Henderson           .vece = MO_32 },
65209a52d85SRichard Henderson         { .fni8 = gen_ursra64_i64,
65309a52d85SRichard Henderson           .fniv = gen_ursra_vec,
65409a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_d,
65509a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
65609a52d85SRichard Henderson           .opt_opc = vecop_list,
65709a52d85SRichard Henderson           .load_dest = true,
65809a52d85SRichard Henderson           .vece = MO_64 },
65909a52d85SRichard Henderson     };
66009a52d85SRichard Henderson 
66109a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
66209a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
66309a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
66409a52d85SRichard Henderson 
66509a52d85SRichard Henderson     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
66609a52d85SRichard Henderson }
66709a52d85SRichard Henderson 
gen_shr8_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)66809a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
66909a52d85SRichard Henderson {
67009a52d85SRichard Henderson     uint64_t mask = dup_const(MO_8, 0xff >> shift);
67109a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
67209a52d85SRichard Henderson 
67309a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, shift);
67409a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
67509a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
67609a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
67709a52d85SRichard Henderson }
67809a52d85SRichard Henderson 
gen_shr16_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)67909a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
68009a52d85SRichard Henderson {
68109a52d85SRichard Henderson     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
68209a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
68309a52d85SRichard Henderson 
68409a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, shift);
68509a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
68609a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
68709a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
68809a52d85SRichard Henderson }
68909a52d85SRichard Henderson 
gen_shr32_ins_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)69009a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
69109a52d85SRichard Henderson {
69209a52d85SRichard Henderson     tcg_gen_shri_i32(a, a, shift);
69309a52d85SRichard Henderson     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
69409a52d85SRichard Henderson }
69509a52d85SRichard Henderson 
gen_shr64_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)69609a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
69709a52d85SRichard Henderson {
69809a52d85SRichard Henderson     tcg_gen_shri_i64(a, a, shift);
69909a52d85SRichard Henderson     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
70009a52d85SRichard Henderson }
70109a52d85SRichard Henderson 
gen_shr_ins_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)70209a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
70309a52d85SRichard Henderson {
70409a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
705143e179cSRichard Henderson     int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh);
706143e179cSRichard Henderson     TCGv_vec m = tcg_constant_vec_matching(d, vece, mi);
70709a52d85SRichard Henderson 
70809a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, sh);
70909a52d85SRichard Henderson     tcg_gen_and_vec(vece, d, d, m);
71009a52d85SRichard Henderson     tcg_gen_or_vec(vece, d, d, t);
71109a52d85SRichard Henderson }
71209a52d85SRichard Henderson 
gen_gvec_sri(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)71309a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
71409a52d85SRichard Henderson                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
71509a52d85SRichard Henderson {
71609a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
71709a52d85SRichard Henderson     const GVecGen2i ops[4] = {
71809a52d85SRichard Henderson         { .fni8 = gen_shr8_ins_i64,
71909a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
72009a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_b,
72109a52d85SRichard Henderson           .load_dest = true,
72209a52d85SRichard Henderson           .opt_opc = vecop_list,
72309a52d85SRichard Henderson           .vece = MO_8 },
72409a52d85SRichard Henderson         { .fni8 = gen_shr16_ins_i64,
72509a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
72609a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_h,
72709a52d85SRichard Henderson           .load_dest = true,
72809a52d85SRichard Henderson           .opt_opc = vecop_list,
72909a52d85SRichard Henderson           .vece = MO_16 },
73009a52d85SRichard Henderson         { .fni4 = gen_shr32_ins_i32,
73109a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
73209a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_s,
73309a52d85SRichard Henderson           .load_dest = true,
73409a52d85SRichard Henderson           .opt_opc = vecop_list,
73509a52d85SRichard Henderson           .vece = MO_32 },
73609a52d85SRichard Henderson         { .fni8 = gen_shr64_ins_i64,
73709a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
73809a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_d,
73909a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
74009a52d85SRichard Henderson           .load_dest = true,
74109a52d85SRichard Henderson           .opt_opc = vecop_list,
74209a52d85SRichard Henderson           .vece = MO_64 },
74309a52d85SRichard Henderson     };
74409a52d85SRichard Henderson 
74509a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
74609a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
74709a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
74809a52d85SRichard Henderson 
74909a52d85SRichard Henderson     /* Shift of esize leaves destination unchanged. */
75009a52d85SRichard Henderson     if (shift < (8 << vece)) {
75109a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
75209a52d85SRichard Henderson     } else {
75309a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
75409a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
75509a52d85SRichard Henderson     }
75609a52d85SRichard Henderson }
75709a52d85SRichard Henderson 
gen_shl8_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)75809a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
75909a52d85SRichard Henderson {
76009a52d85SRichard Henderson     uint64_t mask = dup_const(MO_8, 0xff << shift);
76109a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
76209a52d85SRichard Henderson 
76309a52d85SRichard Henderson     tcg_gen_shli_i64(t, a, shift);
76409a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
76509a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
76609a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
76709a52d85SRichard Henderson }
76809a52d85SRichard Henderson 
gen_shl16_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)76909a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
77009a52d85SRichard Henderson {
77109a52d85SRichard Henderson     uint64_t mask = dup_const(MO_16, 0xffff << shift);
77209a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
77309a52d85SRichard Henderson 
77409a52d85SRichard Henderson     tcg_gen_shli_i64(t, a, shift);
77509a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
77609a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
77709a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
77809a52d85SRichard Henderson }
77909a52d85SRichard Henderson 
gen_shl32_ins_i32(TCGv_i32 d,TCGv_i32 a,int32_t shift)78009a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
78109a52d85SRichard Henderson {
78209a52d85SRichard Henderson     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
78309a52d85SRichard Henderson }
78409a52d85SRichard Henderson 
gen_shl64_ins_i64(TCGv_i64 d,TCGv_i64 a,int64_t shift)78509a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
78609a52d85SRichard Henderson {
78709a52d85SRichard Henderson     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
78809a52d85SRichard Henderson }
78909a52d85SRichard Henderson 
gen_shl_ins_vec(unsigned vece,TCGv_vec d,TCGv_vec a,int64_t sh)79009a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
79109a52d85SRichard Henderson {
79209a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
793143e179cSRichard Henderson     TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh));
79409a52d85SRichard Henderson 
79509a52d85SRichard Henderson     tcg_gen_shli_vec(vece, t, a, sh);
79609a52d85SRichard Henderson     tcg_gen_and_vec(vece, d, d, m);
79709a52d85SRichard Henderson     tcg_gen_or_vec(vece, d, d, t);
79809a52d85SRichard Henderson }
79909a52d85SRichard Henderson 
gen_gvec_sli(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,int64_t shift,uint32_t opr_sz,uint32_t max_sz)80009a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
80109a52d85SRichard Henderson                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
80209a52d85SRichard Henderson {
80309a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
80409a52d85SRichard Henderson     const GVecGen2i ops[4] = {
80509a52d85SRichard Henderson         { .fni8 = gen_shl8_ins_i64,
80609a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
80709a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_b,
80809a52d85SRichard Henderson           .load_dest = true,
80909a52d85SRichard Henderson           .opt_opc = vecop_list,
81009a52d85SRichard Henderson           .vece = MO_8 },
81109a52d85SRichard Henderson         { .fni8 = gen_shl16_ins_i64,
81209a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
81309a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_h,
81409a52d85SRichard Henderson           .load_dest = true,
81509a52d85SRichard Henderson           .opt_opc = vecop_list,
81609a52d85SRichard Henderson           .vece = MO_16 },
81709a52d85SRichard Henderson         { .fni4 = gen_shl32_ins_i32,
81809a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
81909a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_s,
82009a52d85SRichard Henderson           .load_dest = true,
82109a52d85SRichard Henderson           .opt_opc = vecop_list,
82209a52d85SRichard Henderson           .vece = MO_32 },
82309a52d85SRichard Henderson         { .fni8 = gen_shl64_ins_i64,
82409a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
82509a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_d,
82609a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
82709a52d85SRichard Henderson           .load_dest = true,
82809a52d85SRichard Henderson           .opt_opc = vecop_list,
82909a52d85SRichard Henderson           .vece = MO_64 },
83009a52d85SRichard Henderson     };
83109a52d85SRichard Henderson 
83209a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [0..esize-1]. */
83309a52d85SRichard Henderson     tcg_debug_assert(shift >= 0);
83409a52d85SRichard Henderson     tcg_debug_assert(shift < (8 << vece));
83509a52d85SRichard Henderson 
83609a52d85SRichard Henderson     if (shift == 0) {
83709a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
83809a52d85SRichard Henderson     } else {
83909a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
84009a52d85SRichard Henderson     }
84109a52d85SRichard Henderson }
84209a52d85SRichard Henderson 
gen_mla8_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)84309a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
84409a52d85SRichard Henderson {
84509a52d85SRichard Henderson     gen_helper_neon_mul_u8(a, a, b);
84609a52d85SRichard Henderson     gen_helper_neon_add_u8(d, d, a);
84709a52d85SRichard Henderson }
84809a52d85SRichard Henderson 
gen_mls8_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)84909a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
85009a52d85SRichard Henderson {
85109a52d85SRichard Henderson     gen_helper_neon_mul_u8(a, a, b);
85209a52d85SRichard Henderson     gen_helper_neon_sub_u8(d, d, a);
85309a52d85SRichard Henderson }
85409a52d85SRichard Henderson 
gen_mla16_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)85509a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
85609a52d85SRichard Henderson {
85709a52d85SRichard Henderson     gen_helper_neon_mul_u16(a, a, b);
85809a52d85SRichard Henderson     gen_helper_neon_add_u16(d, d, a);
85909a52d85SRichard Henderson }
86009a52d85SRichard Henderson 
gen_mls16_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)86109a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
86209a52d85SRichard Henderson {
86309a52d85SRichard Henderson     gen_helper_neon_mul_u16(a, a, b);
86409a52d85SRichard Henderson     gen_helper_neon_sub_u16(d, d, a);
86509a52d85SRichard Henderson }
86609a52d85SRichard Henderson 
gen_mla32_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)86709a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
86809a52d85SRichard Henderson {
86909a52d85SRichard Henderson     tcg_gen_mul_i32(a, a, b);
87009a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
87109a52d85SRichard Henderson }
87209a52d85SRichard Henderson 
gen_mls32_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)87309a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
87409a52d85SRichard Henderson {
87509a52d85SRichard Henderson     tcg_gen_mul_i32(a, a, b);
87609a52d85SRichard Henderson     tcg_gen_sub_i32(d, d, a);
87709a52d85SRichard Henderson }
87809a52d85SRichard Henderson 
gen_mla64_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)87909a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
88009a52d85SRichard Henderson {
88109a52d85SRichard Henderson     tcg_gen_mul_i64(a, a, b);
88209a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
88309a52d85SRichard Henderson }
88409a52d85SRichard Henderson 
gen_mls64_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)88509a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
88609a52d85SRichard Henderson {
88709a52d85SRichard Henderson     tcg_gen_mul_i64(a, a, b);
88809a52d85SRichard Henderson     tcg_gen_sub_i64(d, d, a);
88909a52d85SRichard Henderson }
89009a52d85SRichard Henderson 
gen_mla_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)89109a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
89209a52d85SRichard Henderson {
89309a52d85SRichard Henderson     tcg_gen_mul_vec(vece, a, a, b);
89409a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
89509a52d85SRichard Henderson }
89609a52d85SRichard Henderson 
gen_mls_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)89709a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
89809a52d85SRichard Henderson {
89909a52d85SRichard Henderson     tcg_gen_mul_vec(vece, a, a, b);
90009a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, a);
90109a52d85SRichard Henderson }
90209a52d85SRichard Henderson 
90309a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
90409a52d85SRichard Henderson  * these tables are shared with AArch64 which does support them.
90509a52d85SRichard Henderson  */
gen_gvec_mla(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)90609a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
90709a52d85SRichard Henderson                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
90809a52d85SRichard Henderson {
90909a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
91009a52d85SRichard Henderson         INDEX_op_mul_vec, INDEX_op_add_vec, 0
91109a52d85SRichard Henderson     };
91209a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
91309a52d85SRichard Henderson         { .fni4 = gen_mla8_i32,
91409a52d85SRichard Henderson           .fniv = gen_mla_vec,
91509a52d85SRichard Henderson           .load_dest = true,
91609a52d85SRichard Henderson           .opt_opc = vecop_list,
91709a52d85SRichard Henderson           .vece = MO_8 },
91809a52d85SRichard Henderson         { .fni4 = gen_mla16_i32,
91909a52d85SRichard Henderson           .fniv = gen_mla_vec,
92009a52d85SRichard Henderson           .load_dest = true,
92109a52d85SRichard Henderson           .opt_opc = vecop_list,
92209a52d85SRichard Henderson           .vece = MO_16 },
92309a52d85SRichard Henderson         { .fni4 = gen_mla32_i32,
92409a52d85SRichard Henderson           .fniv = gen_mla_vec,
92509a52d85SRichard Henderson           .load_dest = true,
92609a52d85SRichard Henderson           .opt_opc = vecop_list,
92709a52d85SRichard Henderson           .vece = MO_32 },
92809a52d85SRichard Henderson         { .fni8 = gen_mla64_i64,
92909a52d85SRichard Henderson           .fniv = gen_mla_vec,
93009a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
93109a52d85SRichard Henderson           .load_dest = true,
93209a52d85SRichard Henderson           .opt_opc = vecop_list,
93309a52d85SRichard Henderson           .vece = MO_64 },
93409a52d85SRichard Henderson     };
93509a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
93609a52d85SRichard Henderson }
93709a52d85SRichard Henderson 
gen_gvec_mls(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)93809a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
93909a52d85SRichard Henderson                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
94009a52d85SRichard Henderson {
94109a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
94209a52d85SRichard Henderson         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
94309a52d85SRichard Henderson     };
94409a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
94509a52d85SRichard Henderson         { .fni4 = gen_mls8_i32,
94609a52d85SRichard Henderson           .fniv = gen_mls_vec,
94709a52d85SRichard Henderson           .load_dest = true,
94809a52d85SRichard Henderson           .opt_opc = vecop_list,
94909a52d85SRichard Henderson           .vece = MO_8 },
95009a52d85SRichard Henderson         { .fni4 = gen_mls16_i32,
95109a52d85SRichard Henderson           .fniv = gen_mls_vec,
95209a52d85SRichard Henderson           .load_dest = true,
95309a52d85SRichard Henderson           .opt_opc = vecop_list,
95409a52d85SRichard Henderson           .vece = MO_16 },
95509a52d85SRichard Henderson         { .fni4 = gen_mls32_i32,
95609a52d85SRichard Henderson           .fniv = gen_mls_vec,
95709a52d85SRichard Henderson           .load_dest = true,
95809a52d85SRichard Henderson           .opt_opc = vecop_list,
95909a52d85SRichard Henderson           .vece = MO_32 },
96009a52d85SRichard Henderson         { .fni8 = gen_mls64_i64,
96109a52d85SRichard Henderson           .fniv = gen_mls_vec,
96209a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
96309a52d85SRichard Henderson           .load_dest = true,
96409a52d85SRichard Henderson           .opt_opc = vecop_list,
96509a52d85SRichard Henderson           .vece = MO_64 },
96609a52d85SRichard Henderson     };
96709a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
96809a52d85SRichard Henderson }
96909a52d85SRichard Henderson 
97009a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */
gen_cmtst_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)97109a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
97209a52d85SRichard Henderson {
973013506e0SRichard Henderson     tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b);
97409a52d85SRichard Henderson }
97509a52d85SRichard Henderson 
gen_cmtst_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)97609a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
97709a52d85SRichard Henderson {
978013506e0SRichard Henderson     tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b);
97909a52d85SRichard Henderson }
98009a52d85SRichard Henderson 
gen_cmtst_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)98109a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
98209a52d85SRichard Henderson {
9832310eb0aSRichard Henderson     tcg_gen_cmp_vec(TCG_COND_TSTNE, vece, d, a, b);
98409a52d85SRichard Henderson }
98509a52d85SRichard Henderson 
gen_gvec_cmtst(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)98609a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
98709a52d85SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
98809a52d85SRichard Henderson {
98909a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
99009a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
99109a52d85SRichard Henderson         { .fni4 = gen_helper_neon_tst_u8,
99209a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
99309a52d85SRichard Henderson           .opt_opc = vecop_list,
99409a52d85SRichard Henderson           .vece = MO_8 },
99509a52d85SRichard Henderson         { .fni4 = gen_helper_neon_tst_u16,
99609a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
99709a52d85SRichard Henderson           .opt_opc = vecop_list,
99809a52d85SRichard Henderson           .vece = MO_16 },
99909a52d85SRichard Henderson         { .fni4 = gen_cmtst_i32,
100009a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
100109a52d85SRichard Henderson           .opt_opc = vecop_list,
100209a52d85SRichard Henderson           .vece = MO_32 },
100309a52d85SRichard Henderson         { .fni8 = gen_cmtst_i64,
100409a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
100509a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
100609a52d85SRichard Henderson           .opt_opc = vecop_list,
100709a52d85SRichard Henderson           .vece = MO_64 },
100809a52d85SRichard Henderson     };
100909a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
101009a52d85SRichard Henderson }
101109a52d85SRichard Henderson 
gen_ushl_i32(TCGv_i32 dst,TCGv_i32 src,TCGv_i32 shift)101209a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
101309a52d85SRichard Henderson {
101409a52d85SRichard Henderson     TCGv_i32 lval = tcg_temp_new_i32();
101509a52d85SRichard Henderson     TCGv_i32 rval = tcg_temp_new_i32();
101609a52d85SRichard Henderson     TCGv_i32 lsh = tcg_temp_new_i32();
101709a52d85SRichard Henderson     TCGv_i32 rsh = tcg_temp_new_i32();
101809a52d85SRichard Henderson     TCGv_i32 zero = tcg_constant_i32(0);
101909a52d85SRichard Henderson     TCGv_i32 max = tcg_constant_i32(32);
102009a52d85SRichard Henderson 
102109a52d85SRichard Henderson     /*
102209a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
102309a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
102409a52d85SRichard Henderson      * Discard out-of-range results after the fact.
102509a52d85SRichard Henderson      */
102609a52d85SRichard Henderson     tcg_gen_ext8s_i32(lsh, shift);
102709a52d85SRichard Henderson     tcg_gen_neg_i32(rsh, lsh);
102809a52d85SRichard Henderson     tcg_gen_shl_i32(lval, src, lsh);
102909a52d85SRichard Henderson     tcg_gen_shr_i32(rval, src, rsh);
103009a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
103109a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
103209a52d85SRichard Henderson }
103309a52d85SRichard Henderson 
gen_ushl_i64(TCGv_i64 dst,TCGv_i64 src,TCGv_i64 shift)103409a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
103509a52d85SRichard Henderson {
103609a52d85SRichard Henderson     TCGv_i64 lval = tcg_temp_new_i64();
103709a52d85SRichard Henderson     TCGv_i64 rval = tcg_temp_new_i64();
103809a52d85SRichard Henderson     TCGv_i64 lsh = tcg_temp_new_i64();
103909a52d85SRichard Henderson     TCGv_i64 rsh = tcg_temp_new_i64();
104009a52d85SRichard Henderson     TCGv_i64 zero = tcg_constant_i64(0);
104109a52d85SRichard Henderson     TCGv_i64 max = tcg_constant_i64(64);
104209a52d85SRichard Henderson 
104309a52d85SRichard Henderson     /*
104409a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
104509a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
104609a52d85SRichard Henderson      * Discard out-of-range results after the fact.
104709a52d85SRichard Henderson      */
104809a52d85SRichard Henderson     tcg_gen_ext8s_i64(lsh, shift);
104909a52d85SRichard Henderson     tcg_gen_neg_i64(rsh, lsh);
105009a52d85SRichard Henderson     tcg_gen_shl_i64(lval, src, lsh);
105109a52d85SRichard Henderson     tcg_gen_shr_i64(rval, src, rsh);
105209a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
105309a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
105409a52d85SRichard Henderson }
105509a52d85SRichard Henderson 
gen_ushl_vec(unsigned vece,TCGv_vec dst,TCGv_vec src,TCGv_vec shift)105609a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
105709a52d85SRichard Henderson                          TCGv_vec src, TCGv_vec shift)
105809a52d85SRichard Henderson {
105909a52d85SRichard Henderson     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
106009a52d85SRichard Henderson     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
106109a52d85SRichard Henderson     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
106209a52d85SRichard Henderson     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1063c17e35b8SRichard Henderson     TCGv_vec max, zero;
106409a52d85SRichard Henderson 
106509a52d85SRichard Henderson     tcg_gen_neg_vec(vece, rsh, shift);
106609a52d85SRichard Henderson     if (vece == MO_8) {
106709a52d85SRichard Henderson         tcg_gen_mov_vec(lsh, shift);
106809a52d85SRichard Henderson     } else {
1069143e179cSRichard Henderson         TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
107009a52d85SRichard Henderson         tcg_gen_and_vec(vece, lsh, shift, msk);
107109a52d85SRichard Henderson         tcg_gen_and_vec(vece, rsh, rsh, msk);
107209a52d85SRichard Henderson     }
107309a52d85SRichard Henderson 
107409a52d85SRichard Henderson     /*
107509a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
107609a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
107709a52d85SRichard Henderson      * Discard out-of-range results after the fact.
107809a52d85SRichard Henderson      */
107909a52d85SRichard Henderson     tcg_gen_shlv_vec(vece, lval, src, lsh);
108009a52d85SRichard Henderson     tcg_gen_shrv_vec(vece, rval, src, rsh);
108109a52d85SRichard Henderson 
108209a52d85SRichard Henderson     /*
1083c17e35b8SRichard Henderson      * The choice of GE (signed) and GEU (unsigned) are biased toward
108409a52d85SRichard Henderson      * the instructions of the x86_64 host.  For MO_8, the whole byte
108509a52d85SRichard Henderson      * is significant so we must use an unsigned compare; otherwise we
108609a52d85SRichard Henderson      * have already masked to a byte and so a signed compare works.
108709a52d85SRichard Henderson      * Other tcg hosts have a full set of comparisons and do not care.
108809a52d85SRichard Henderson      */
1089c17e35b8SRichard Henderson     zero = tcg_constant_vec_matching(dst, vece, 0);
1090143e179cSRichard Henderson     max = tcg_constant_vec_matching(dst, vece, 8 << vece);
109109a52d85SRichard Henderson     if (vece == MO_8) {
1092c17e35b8SRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, lval, lsh, max, zero, lval);
1093c17e35b8SRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, rval, rsh, max, zero, rval);
109409a52d85SRichard Henderson     } else {
1095c17e35b8SRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_GE, vece, lval, lsh, max, zero, lval);
1096c17e35b8SRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_GE, vece, rval, rsh, max, zero, rval);
109709a52d85SRichard Henderson     }
109809a52d85SRichard Henderson     tcg_gen_or_vec(vece, dst, lval, rval);
109909a52d85SRichard Henderson }
110009a52d85SRichard Henderson 
gen_gvec_ushl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)110109a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
110209a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
110309a52d85SRichard Henderson {
110409a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
110509a52d85SRichard Henderson         INDEX_op_neg_vec, INDEX_op_shlv_vec,
1106c17e35b8SRichard Henderson         INDEX_op_shrv_vec, INDEX_op_cmpsel_vec, 0
110709a52d85SRichard Henderson     };
110809a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
110909a52d85SRichard Henderson         { .fniv = gen_ushl_vec,
111009a52d85SRichard Henderson           .fno = gen_helper_gvec_ushl_b,
111109a52d85SRichard Henderson           .opt_opc = vecop_list,
111209a52d85SRichard Henderson           .vece = MO_8 },
111309a52d85SRichard Henderson         { .fniv = gen_ushl_vec,
111409a52d85SRichard Henderson           .fno = gen_helper_gvec_ushl_h,
111509a52d85SRichard Henderson           .opt_opc = vecop_list,
111609a52d85SRichard Henderson           .vece = MO_16 },
111709a52d85SRichard Henderson         { .fni4 = gen_ushl_i32,
111809a52d85SRichard Henderson           .fniv = gen_ushl_vec,
111909a52d85SRichard Henderson           .opt_opc = vecop_list,
112009a52d85SRichard Henderson           .vece = MO_32 },
112109a52d85SRichard Henderson         { .fni8 = gen_ushl_i64,
112209a52d85SRichard Henderson           .fniv = gen_ushl_vec,
112309a52d85SRichard Henderson           .opt_opc = vecop_list,
112409a52d85SRichard Henderson           .vece = MO_64 },
112509a52d85SRichard Henderson     };
112609a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
112709a52d85SRichard Henderson }
112809a52d85SRichard Henderson 
gen_sshl_i32(TCGv_i32 dst,TCGv_i32 src,TCGv_i32 shift)112909a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
113009a52d85SRichard Henderson {
113109a52d85SRichard Henderson     TCGv_i32 lval = tcg_temp_new_i32();
113209a52d85SRichard Henderson     TCGv_i32 rval = tcg_temp_new_i32();
113309a52d85SRichard Henderson     TCGv_i32 lsh = tcg_temp_new_i32();
113409a52d85SRichard Henderson     TCGv_i32 rsh = tcg_temp_new_i32();
113509a52d85SRichard Henderson     TCGv_i32 zero = tcg_constant_i32(0);
113609a52d85SRichard Henderson     TCGv_i32 max = tcg_constant_i32(31);
113709a52d85SRichard Henderson 
113809a52d85SRichard Henderson     /*
113909a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
114009a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
114109a52d85SRichard Henderson      * Discard out-of-range results after the fact.
114209a52d85SRichard Henderson      */
114309a52d85SRichard Henderson     tcg_gen_ext8s_i32(lsh, shift);
114409a52d85SRichard Henderson     tcg_gen_neg_i32(rsh, lsh);
114509a52d85SRichard Henderson     tcg_gen_shl_i32(lval, src, lsh);
114609a52d85SRichard Henderson     tcg_gen_umin_i32(rsh, rsh, max);
114709a52d85SRichard Henderson     tcg_gen_sar_i32(rval, src, rsh);
114809a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
114909a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
115009a52d85SRichard Henderson }
115109a52d85SRichard Henderson 
gen_sshl_i64(TCGv_i64 dst,TCGv_i64 src,TCGv_i64 shift)115209a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
115309a52d85SRichard Henderson {
115409a52d85SRichard Henderson     TCGv_i64 lval = tcg_temp_new_i64();
115509a52d85SRichard Henderson     TCGv_i64 rval = tcg_temp_new_i64();
115609a52d85SRichard Henderson     TCGv_i64 lsh = tcg_temp_new_i64();
115709a52d85SRichard Henderson     TCGv_i64 rsh = tcg_temp_new_i64();
115809a52d85SRichard Henderson     TCGv_i64 zero = tcg_constant_i64(0);
115909a52d85SRichard Henderson     TCGv_i64 max = tcg_constant_i64(63);
116009a52d85SRichard Henderson 
116109a52d85SRichard Henderson     /*
116209a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
116309a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
116409a52d85SRichard Henderson      * Discard out-of-range results after the fact.
116509a52d85SRichard Henderson      */
116609a52d85SRichard Henderson     tcg_gen_ext8s_i64(lsh, shift);
116709a52d85SRichard Henderson     tcg_gen_neg_i64(rsh, lsh);
116809a52d85SRichard Henderson     tcg_gen_shl_i64(lval, src, lsh);
116909a52d85SRichard Henderson     tcg_gen_umin_i64(rsh, rsh, max);
117009a52d85SRichard Henderson     tcg_gen_sar_i64(rval, src, rsh);
117109a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
117209a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
117309a52d85SRichard Henderson }
117409a52d85SRichard Henderson 
gen_sshl_vec(unsigned vece,TCGv_vec dst,TCGv_vec src,TCGv_vec shift)117509a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
117609a52d85SRichard Henderson                          TCGv_vec src, TCGv_vec shift)
117709a52d85SRichard Henderson {
117809a52d85SRichard Henderson     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
117909a52d85SRichard Henderson     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
118009a52d85SRichard Henderson     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
118109a52d85SRichard Henderson     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1182143e179cSRichard Henderson     TCGv_vec max, zero;
118309a52d85SRichard Henderson 
118409a52d85SRichard Henderson     /*
118509a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
118609a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
118709a52d85SRichard Henderson      * Discard out-of-range results after the fact.
118809a52d85SRichard Henderson      */
118909a52d85SRichard Henderson     tcg_gen_neg_vec(vece, rsh, shift);
119009a52d85SRichard Henderson     if (vece == MO_8) {
119109a52d85SRichard Henderson         tcg_gen_mov_vec(lsh, shift);
119209a52d85SRichard Henderson     } else {
1193143e179cSRichard Henderson         TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
1194143e179cSRichard Henderson         tcg_gen_and_vec(vece, lsh, shift, msk);
1195143e179cSRichard Henderson         tcg_gen_and_vec(vece, rsh, rsh, msk);
119609a52d85SRichard Henderson     }
119709a52d85SRichard Henderson 
119809a52d85SRichard Henderson     /* Bound rsh so out of bound right shift gets -1.  */
1199143e179cSRichard Henderson     max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1);
1200143e179cSRichard Henderson     tcg_gen_umin_vec(vece, rsh, rsh, max);
120109a52d85SRichard Henderson 
120209a52d85SRichard Henderson     tcg_gen_shlv_vec(vece, lval, src, lsh);
120309a52d85SRichard Henderson     tcg_gen_sarv_vec(vece, rval, src, rsh);
120409a52d85SRichard Henderson 
120509a52d85SRichard Henderson     /* Select in-bound left shift.  */
1206ee36a772SRichard Henderson     zero = tcg_constant_vec_matching(dst, vece, 0);
1207ee36a772SRichard Henderson     tcg_gen_cmpsel_vec(TCG_COND_GT, vece, lval, lsh, max, zero, lval);
120809a52d85SRichard Henderson 
120909a52d85SRichard Henderson     /* Select between left and right shift.  */
121009a52d85SRichard Henderson     if (vece == MO_8) {
1211143e179cSRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval);
121209a52d85SRichard Henderson     } else {
1213143e179cSRichard Henderson         TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80);
1214143e179cSRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval);
121509a52d85SRichard Henderson     }
121609a52d85SRichard Henderson }
121709a52d85SRichard Henderson 
gen_gvec_sshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)121809a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
121909a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
122009a52d85SRichard Henderson {
122109a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
122209a52d85SRichard Henderson         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
1223ee36a772SRichard Henderson         INDEX_op_sarv_vec, INDEX_op_cmpsel_vec, 0
122409a52d85SRichard Henderson     };
122509a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
122609a52d85SRichard Henderson         { .fniv = gen_sshl_vec,
122709a52d85SRichard Henderson           .fno = gen_helper_gvec_sshl_b,
122809a52d85SRichard Henderson           .opt_opc = vecop_list,
122909a52d85SRichard Henderson           .vece = MO_8 },
123009a52d85SRichard Henderson         { .fniv = gen_sshl_vec,
123109a52d85SRichard Henderson           .fno = gen_helper_gvec_sshl_h,
123209a52d85SRichard Henderson           .opt_opc = vecop_list,
123309a52d85SRichard Henderson           .vece = MO_16 },
123409a52d85SRichard Henderson         { .fni4 = gen_sshl_i32,
123509a52d85SRichard Henderson           .fniv = gen_sshl_vec,
123609a52d85SRichard Henderson           .opt_opc = vecop_list,
123709a52d85SRichard Henderson           .vece = MO_32 },
123809a52d85SRichard Henderson         { .fni8 = gen_sshl_i64,
123909a52d85SRichard Henderson           .fniv = gen_sshl_vec,
124009a52d85SRichard Henderson           .opt_opc = vecop_list,
124109a52d85SRichard Henderson           .vece = MO_64 },
124209a52d85SRichard Henderson     };
124309a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
124409a52d85SRichard Henderson }
124509a52d85SRichard Henderson 
gen_gvec_srshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1246940392c8SRichard Henderson void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1247940392c8SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1248940392c8SRichard Henderson {
1249940392c8SRichard Henderson     static gen_helper_gvec_3 * const fns[] = {
1250940392c8SRichard Henderson         gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h,
1251940392c8SRichard Henderson         gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d,
1252940392c8SRichard Henderson     };
1253940392c8SRichard Henderson     tcg_debug_assert(vece <= MO_64);
1254940392c8SRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1255940392c8SRichard Henderson }
1256940392c8SRichard Henderson 
gen_gvec_urshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1257940392c8SRichard Henderson void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1258940392c8SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1259940392c8SRichard Henderson {
1260940392c8SRichard Henderson     static gen_helper_gvec_3 * const fns[] = {
1261940392c8SRichard Henderson         gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h,
1262940392c8SRichard Henderson         gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d,
1263940392c8SRichard Henderson     };
1264940392c8SRichard Henderson     tcg_debug_assert(vece <= MO_64);
1265940392c8SRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1266940392c8SRichard Henderson }
1267940392c8SRichard Henderson 
gen_neon_sqshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1268e72a6878SRichard Henderson void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1269e72a6878SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1270e72a6878SRichard Henderson {
1271e72a6878SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[] = {
1272e72a6878SRichard Henderson         gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h,
1273e72a6878SRichard Henderson         gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d,
1274e72a6878SRichard Henderson     };
1275e72a6878SRichard Henderson     tcg_debug_assert(vece <= MO_64);
1276e72a6878SRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1277e72a6878SRichard Henderson                        opr_sz, max_sz, 0, fns[vece]);
1278e72a6878SRichard Henderson }
1279e72a6878SRichard Henderson 
gen_neon_uqshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1280e72a6878SRichard Henderson void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1281e72a6878SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1282e72a6878SRichard Henderson {
1283e72a6878SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[] = {
1284e72a6878SRichard Henderson         gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h,
1285e72a6878SRichard Henderson         gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d,
1286e72a6878SRichard Henderson     };
1287e72a6878SRichard Henderson     tcg_debug_assert(vece <= MO_64);
1288e72a6878SRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1289e72a6878SRichard Henderson                        opr_sz, max_sz, 0, fns[vece]);
1290e72a6878SRichard Henderson }
1291e72a6878SRichard Henderson 
gen_neon_sqrshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1292cef9d54fSRichard Henderson void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1293cef9d54fSRichard Henderson                      uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1294cef9d54fSRichard Henderson {
1295cef9d54fSRichard Henderson     static gen_helper_gvec_3_ptr * const fns[] = {
1296cef9d54fSRichard Henderson         gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h,
1297cef9d54fSRichard Henderson         gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d,
1298cef9d54fSRichard Henderson     };
1299cef9d54fSRichard Henderson     tcg_debug_assert(vece <= MO_64);
1300cef9d54fSRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1301cef9d54fSRichard Henderson                        opr_sz, max_sz, 0, fns[vece]);
1302cef9d54fSRichard Henderson }
1303cef9d54fSRichard Henderson 
gen_neon_uqrshl(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1304cef9d54fSRichard Henderson void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1305cef9d54fSRichard Henderson                      uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1306cef9d54fSRichard Henderson {
1307cef9d54fSRichard Henderson     static gen_helper_gvec_3_ptr * const fns[] = {
1308cef9d54fSRichard Henderson         gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h,
1309cef9d54fSRichard Henderson         gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d,
1310cef9d54fSRichard Henderson     };
1311cef9d54fSRichard Henderson     tcg_debug_assert(vece <= MO_64);
1312cef9d54fSRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1313cef9d54fSRichard Henderson                        opr_sz, max_sz, 0, fns[vece]);
1314cef9d54fSRichard Henderson }
1315cef9d54fSRichard Henderson 
gen_neon_sqshli(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,int64_t c,uint32_t opr_sz,uint32_t max_sz)1316*ef2b80ebSRichard Henderson void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1317*ef2b80ebSRichard Henderson                      int64_t c, uint32_t opr_sz, uint32_t max_sz)
1318*ef2b80ebSRichard Henderson {
1319*ef2b80ebSRichard Henderson     static gen_helper_gvec_2_ptr * const fns[] = {
1320*ef2b80ebSRichard Henderson         gen_helper_neon_sqshli_b, gen_helper_neon_sqshli_h,
1321*ef2b80ebSRichard Henderson         gen_helper_neon_sqshli_s, gen_helper_neon_sqshli_d,
1322*ef2b80ebSRichard Henderson     };
1323*ef2b80ebSRichard Henderson     tcg_debug_assert(vece <= MO_64);
1324*ef2b80ebSRichard Henderson     tcg_debug_assert(c >= 0 && c <= (8 << vece));
1325*ef2b80ebSRichard Henderson     tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
1326*ef2b80ebSRichard Henderson }
1327*ef2b80ebSRichard Henderson 
gen_neon_uqshli(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,int64_t c,uint32_t opr_sz,uint32_t max_sz)1328*ef2b80ebSRichard Henderson void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1329*ef2b80ebSRichard Henderson                      int64_t c, uint32_t opr_sz, uint32_t max_sz)
1330*ef2b80ebSRichard Henderson {
1331*ef2b80ebSRichard Henderson     static gen_helper_gvec_2_ptr * const fns[] = {
1332*ef2b80ebSRichard Henderson         gen_helper_neon_uqshli_b, gen_helper_neon_uqshli_h,
1333*ef2b80ebSRichard Henderson         gen_helper_neon_uqshli_s, gen_helper_neon_uqshli_d,
1334*ef2b80ebSRichard Henderson     };
1335*ef2b80ebSRichard Henderson     tcg_debug_assert(vece <= MO_64);
1336*ef2b80ebSRichard Henderson     tcg_debug_assert(c >= 0 && c <= (8 << vece));
1337*ef2b80ebSRichard Henderson     tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
1338*ef2b80ebSRichard Henderson }
1339*ef2b80ebSRichard Henderson 
gen_neon_sqshlui(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,int64_t c,uint32_t opr_sz,uint32_t max_sz)1340*ef2b80ebSRichard Henderson void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1341*ef2b80ebSRichard Henderson                       int64_t c, uint32_t opr_sz, uint32_t max_sz)
1342*ef2b80ebSRichard Henderson {
1343*ef2b80ebSRichard Henderson     static gen_helper_gvec_2_ptr * const fns[] = {
1344*ef2b80ebSRichard Henderson         gen_helper_neon_sqshlui_b, gen_helper_neon_sqshlui_h,
1345*ef2b80ebSRichard Henderson         gen_helper_neon_sqshlui_s, gen_helper_neon_sqshlui_d,
1346*ef2b80ebSRichard Henderson     };
1347*ef2b80ebSRichard Henderson     tcg_debug_assert(vece <= MO_64);
1348*ef2b80ebSRichard Henderson     tcg_debug_assert(c >= 0 && c <= (8 << vece));
1349*ef2b80ebSRichard Henderson     tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
1350*ef2b80ebSRichard Henderson }
1351*ef2b80ebSRichard Henderson 
gen_uqadd_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1352f4fa83d6SRichard Henderson void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1353f4fa83d6SRichard Henderson {
1354f4fa83d6SRichard Henderson     uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
1355f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1356f4fa83d6SRichard Henderson 
1357f4fa83d6SRichard Henderson     tcg_gen_add_i64(tmp, a, b);
1358f4fa83d6SRichard Henderson     tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max));
1359f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1360f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1361f4fa83d6SRichard Henderson }
1362f4fa83d6SRichard Henderson 
gen_uqadd_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1363f4fa83d6SRichard Henderson void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1364f4fa83d6SRichard Henderson {
1365f4fa83d6SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1366f4fa83d6SRichard Henderson 
1367f4fa83d6SRichard Henderson     tcg_gen_add_i64(t, a, b);
1368f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a,
1369f4fa83d6SRichard Henderson                         tcg_constant_i64(UINT64_MAX), t);
1370f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t, t, res);
1371f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t);
1372f4fa83d6SRichard Henderson }
1373f4fa83d6SRichard Henderson 
gen_uqadd_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)137476f4a8aeSRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
137509a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
137609a52d85SRichard Henderson {
137709a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
137809a52d85SRichard Henderson     tcg_gen_add_vec(vece, x, a, b);
137909a52d85SRichard Henderson     tcg_gen_usadd_vec(vece, t, a, b);
138076f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
138176f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
138209a52d85SRichard Henderson }
138309a52d85SRichard Henderson 
gen_gvec_uqadd_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)138409a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
138509a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
138609a52d85SRichard Henderson {
138709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
138876f4a8aeSRichard Henderson         INDEX_op_usadd_vec, INDEX_op_add_vec, 0
138909a52d85SRichard Henderson     };
139009a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
139109a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
139209a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_b,
139309a52d85SRichard Henderson           .write_aofs = true,
139409a52d85SRichard Henderson           .opt_opc = vecop_list,
139509a52d85SRichard Henderson           .vece = MO_8 },
139609a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
139709a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_h,
139809a52d85SRichard Henderson           .write_aofs = true,
139909a52d85SRichard Henderson           .opt_opc = vecop_list,
140009a52d85SRichard Henderson           .vece = MO_16 },
140109a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
140209a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_s,
140309a52d85SRichard Henderson           .write_aofs = true,
140409a52d85SRichard Henderson           .opt_opc = vecop_list,
140509a52d85SRichard Henderson           .vece = MO_32 },
140609a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
1407f4fa83d6SRichard Henderson           .fni8 = gen_uqadd_d,
140809a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_d,
140909a52d85SRichard Henderson           .write_aofs = true,
141009a52d85SRichard Henderson           .opt_opc = vecop_list,
141109a52d85SRichard Henderson           .vece = MO_64 },
141209a52d85SRichard Henderson     };
141301d5665bSRichard Henderson 
141401d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
141509a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
141609a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
141709a52d85SRichard Henderson }
141809a52d85SRichard Henderson 
gen_sqadd_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1419f4fa83d6SRichard Henderson void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1420f4fa83d6SRichard Henderson {
1421f4fa83d6SRichard Henderson     int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
1422f4fa83d6SRichard Henderson     int64_t min = -1ll - max;
1423f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1424f4fa83d6SRichard Henderson 
1425f4fa83d6SRichard Henderson     tcg_gen_add_i64(tmp, a, b);
1426f4fa83d6SRichard Henderson     tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
1427f4fa83d6SRichard Henderson     tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
1428f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1429f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1430f4fa83d6SRichard Henderson }
1431f4fa83d6SRichard Henderson 
gen_sqadd_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1432f4fa83d6SRichard Henderson void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1433f4fa83d6SRichard Henderson {
1434f4fa83d6SRichard Henderson     TCGv_i64 t0 = tcg_temp_new_i64();
1435f4fa83d6SRichard Henderson     TCGv_i64 t1 = tcg_temp_new_i64();
1436f4fa83d6SRichard Henderson     TCGv_i64 t2 = tcg_temp_new_i64();
1437f4fa83d6SRichard Henderson 
1438f4fa83d6SRichard Henderson     tcg_gen_add_i64(t0, a, b);
1439f4fa83d6SRichard Henderson 
1440f4fa83d6SRichard Henderson     /* Compute signed overflow indication into T1 */
1441f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t1, a, b);
1442f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t2, t0, a);
1443f4fa83d6SRichard Henderson     tcg_gen_andc_i64(t1, t2, t1);
1444f4fa83d6SRichard Henderson 
1445f4fa83d6SRichard Henderson     /* Compute saturated value into T2 */
1446f4fa83d6SRichard Henderson     tcg_gen_sari_i64(t2, a, 63);
1447f4fa83d6SRichard Henderson     tcg_gen_xori_i64(t2, t2, INT64_MAX);
1448f4fa83d6SRichard Henderson 
1449f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
1450f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t0, t0, res);
1451f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t0);
1452f4fa83d6SRichard Henderson }
1453f4fa83d6SRichard Henderson 
gen_sqadd_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)145476f4a8aeSRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
145509a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
145609a52d85SRichard Henderson {
145709a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
145809a52d85SRichard Henderson     tcg_gen_add_vec(vece, x, a, b);
145909a52d85SRichard Henderson     tcg_gen_ssadd_vec(vece, t, a, b);
146076f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
146176f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
146209a52d85SRichard Henderson }
146309a52d85SRichard Henderson 
gen_gvec_sqadd_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)146409a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
146509a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
146609a52d85SRichard Henderson {
146709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
146876f4a8aeSRichard Henderson         INDEX_op_ssadd_vec, INDEX_op_add_vec, 0
146909a52d85SRichard Henderson     };
147009a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
147109a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
147209a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_b,
147309a52d85SRichard Henderson           .opt_opc = vecop_list,
147409a52d85SRichard Henderson           .write_aofs = true,
147509a52d85SRichard Henderson           .vece = MO_8 },
147609a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
147709a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_h,
147809a52d85SRichard Henderson           .opt_opc = vecop_list,
147909a52d85SRichard Henderson           .write_aofs = true,
148009a52d85SRichard Henderson           .vece = MO_16 },
148109a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
148209a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_s,
148309a52d85SRichard Henderson           .opt_opc = vecop_list,
148409a52d85SRichard Henderson           .write_aofs = true,
148509a52d85SRichard Henderson           .vece = MO_32 },
148609a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
1487f4fa83d6SRichard Henderson           .fni8 = gen_sqadd_d,
148809a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_d,
148909a52d85SRichard Henderson           .opt_opc = vecop_list,
149009a52d85SRichard Henderson           .write_aofs = true,
149109a52d85SRichard Henderson           .vece = MO_64 },
149209a52d85SRichard Henderson     };
149301d5665bSRichard Henderson 
149401d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
149509a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
149609a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
149709a52d85SRichard Henderson }
149809a52d85SRichard Henderson 
gen_uqsub_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1499f4fa83d6SRichard Henderson void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1500f4fa83d6SRichard Henderson {
1501f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1502f4fa83d6SRichard Henderson 
1503f4fa83d6SRichard Henderson     tcg_gen_sub_i64(tmp, a, b);
1504f4fa83d6SRichard Henderson     tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0));
1505f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1506f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1507f4fa83d6SRichard Henderson }
1508f4fa83d6SRichard Henderson 
gen_uqsub_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1509f4fa83d6SRichard Henderson void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1510f4fa83d6SRichard Henderson {
1511f4fa83d6SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1512f4fa83d6SRichard Henderson 
1513f4fa83d6SRichard Henderson     tcg_gen_sub_i64(t, a, b);
1514f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t);
1515f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t, t, res);
1516f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t);
1517f4fa83d6SRichard Henderson }
1518f4fa83d6SRichard Henderson 
gen_uqsub_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)151976f4a8aeSRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
152009a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
152109a52d85SRichard Henderson {
152209a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
152309a52d85SRichard Henderson     tcg_gen_sub_vec(vece, x, a, b);
152409a52d85SRichard Henderson     tcg_gen_ussub_vec(vece, t, a, b);
152576f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
152676f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
152709a52d85SRichard Henderson }
152809a52d85SRichard Henderson 
gen_gvec_uqsub_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)152909a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
153009a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
153109a52d85SRichard Henderson {
153209a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
153376f4a8aeSRichard Henderson         INDEX_op_ussub_vec, INDEX_op_sub_vec, 0
153409a52d85SRichard Henderson     };
153509a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
153609a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
153709a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_b,
153809a52d85SRichard Henderson           .opt_opc = vecop_list,
153909a52d85SRichard Henderson           .write_aofs = true,
154009a52d85SRichard Henderson           .vece = MO_8 },
154109a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
154209a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_h,
154309a52d85SRichard Henderson           .opt_opc = vecop_list,
154409a52d85SRichard Henderson           .write_aofs = true,
154509a52d85SRichard Henderson           .vece = MO_16 },
154609a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
154709a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_s,
154809a52d85SRichard Henderson           .opt_opc = vecop_list,
154909a52d85SRichard Henderson           .write_aofs = true,
155009a52d85SRichard Henderson           .vece = MO_32 },
155109a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
1552f4fa83d6SRichard Henderson           .fni8 = gen_uqsub_d,
155309a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_d,
155409a52d85SRichard Henderson           .opt_opc = vecop_list,
155509a52d85SRichard Henderson           .write_aofs = true,
155609a52d85SRichard Henderson           .vece = MO_64 },
155709a52d85SRichard Henderson     };
155801d5665bSRichard Henderson 
155901d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
156009a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
156109a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
156209a52d85SRichard Henderson }
156309a52d85SRichard Henderson 
gen_sqsub_bhs(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b,MemOp esz)1564f4fa83d6SRichard Henderson void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1565f4fa83d6SRichard Henderson {
1566f4fa83d6SRichard Henderson     int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
1567f4fa83d6SRichard Henderson     int64_t min = -1ll - max;
1568f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1569f4fa83d6SRichard Henderson 
1570f4fa83d6SRichard Henderson     tcg_gen_sub_i64(tmp, a, b);
1571f4fa83d6SRichard Henderson     tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
1572f4fa83d6SRichard Henderson     tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
1573f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1574f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1575f4fa83d6SRichard Henderson }
1576f4fa83d6SRichard Henderson 
gen_sqsub_d(TCGv_i64 res,TCGv_i64 qc,TCGv_i64 a,TCGv_i64 b)1577f4fa83d6SRichard Henderson void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1578f4fa83d6SRichard Henderson {
1579f4fa83d6SRichard Henderson     TCGv_i64 t0 = tcg_temp_new_i64();
1580f4fa83d6SRichard Henderson     TCGv_i64 t1 = tcg_temp_new_i64();
1581f4fa83d6SRichard Henderson     TCGv_i64 t2 = tcg_temp_new_i64();
1582f4fa83d6SRichard Henderson 
1583f4fa83d6SRichard Henderson     tcg_gen_sub_i64(t0, a, b);
1584f4fa83d6SRichard Henderson 
1585f4fa83d6SRichard Henderson     /* Compute signed overflow indication into T1 */
1586f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t1, a, b);
1587f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t2, t0, a);
1588f4fa83d6SRichard Henderson     tcg_gen_and_i64(t1, t1, t2);
1589f4fa83d6SRichard Henderson 
1590f4fa83d6SRichard Henderson     /* Compute saturated value into T2 */
1591f4fa83d6SRichard Henderson     tcg_gen_sari_i64(t2, a, 63);
1592f4fa83d6SRichard Henderson     tcg_gen_xori_i64(t2, t2, INT64_MAX);
1593f4fa83d6SRichard Henderson 
1594f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
1595f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t0, t0, res);
1596f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t0);
1597f4fa83d6SRichard Henderson }
1598f4fa83d6SRichard Henderson 
gen_sqsub_vec(unsigned vece,TCGv_vec t,TCGv_vec qc,TCGv_vec a,TCGv_vec b)159976f4a8aeSRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
160009a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
160109a52d85SRichard Henderson {
160209a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
160309a52d85SRichard Henderson     tcg_gen_sub_vec(vece, x, a, b);
160409a52d85SRichard Henderson     tcg_gen_sssub_vec(vece, t, a, b);
160576f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
160676f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
160709a52d85SRichard Henderson }
160809a52d85SRichard Henderson 
gen_gvec_sqsub_qc(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)160909a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
161009a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
161109a52d85SRichard Henderson {
161209a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
161376f4a8aeSRichard Henderson         INDEX_op_sssub_vec, INDEX_op_sub_vec, 0
161409a52d85SRichard Henderson     };
161509a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
161609a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
161709a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_b,
161809a52d85SRichard Henderson           .opt_opc = vecop_list,
161909a52d85SRichard Henderson           .write_aofs = true,
162009a52d85SRichard Henderson           .vece = MO_8 },
162109a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
162209a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_h,
162309a52d85SRichard Henderson           .opt_opc = vecop_list,
162409a52d85SRichard Henderson           .write_aofs = true,
162509a52d85SRichard Henderson           .vece = MO_16 },
162609a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
162709a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_s,
162809a52d85SRichard Henderson           .opt_opc = vecop_list,
162909a52d85SRichard Henderson           .write_aofs = true,
163009a52d85SRichard Henderson           .vece = MO_32 },
163109a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
1632f4fa83d6SRichard Henderson           .fni8 = gen_sqsub_d,
163309a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_d,
163409a52d85SRichard Henderson           .opt_opc = vecop_list,
163509a52d85SRichard Henderson           .write_aofs = true,
163609a52d85SRichard Henderson           .vece = MO_64 },
163709a52d85SRichard Henderson     };
163801d5665bSRichard Henderson 
163901d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
164009a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
164109a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
164209a52d85SRichard Henderson }
164309a52d85SRichard Henderson 
gen_sabd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)164409a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
164509a52d85SRichard Henderson {
164609a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
164709a52d85SRichard Henderson 
164809a52d85SRichard Henderson     tcg_gen_sub_i32(t, a, b);
164909a52d85SRichard Henderson     tcg_gen_sub_i32(d, b, a);
165009a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
165109a52d85SRichard Henderson }
165209a52d85SRichard Henderson 
gen_sabd_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)165309a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
165409a52d85SRichard Henderson {
165509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
165609a52d85SRichard Henderson 
165709a52d85SRichard Henderson     tcg_gen_sub_i64(t, a, b);
165809a52d85SRichard Henderson     tcg_gen_sub_i64(d, b, a);
165909a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
166009a52d85SRichard Henderson }
166109a52d85SRichard Henderson 
gen_sabd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)166209a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
166309a52d85SRichard Henderson {
166409a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
166509a52d85SRichard Henderson 
166609a52d85SRichard Henderson     tcg_gen_smin_vec(vece, t, a, b);
166709a52d85SRichard Henderson     tcg_gen_smax_vec(vece, d, a, b);
166809a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
166909a52d85SRichard Henderson }
167009a52d85SRichard Henderson 
gen_gvec_sabd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)167109a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
167209a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
167309a52d85SRichard Henderson {
167409a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
167509a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
167609a52d85SRichard Henderson     };
167709a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
167809a52d85SRichard Henderson         { .fniv = gen_sabd_vec,
167909a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_b,
168009a52d85SRichard Henderson           .opt_opc = vecop_list,
168109a52d85SRichard Henderson           .vece = MO_8 },
168209a52d85SRichard Henderson         { .fniv = gen_sabd_vec,
168309a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_h,
168409a52d85SRichard Henderson           .opt_opc = vecop_list,
168509a52d85SRichard Henderson           .vece = MO_16 },
168609a52d85SRichard Henderson         { .fni4 = gen_sabd_i32,
168709a52d85SRichard Henderson           .fniv = gen_sabd_vec,
168809a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_s,
168909a52d85SRichard Henderson           .opt_opc = vecop_list,
169009a52d85SRichard Henderson           .vece = MO_32 },
169109a52d85SRichard Henderson         { .fni8 = gen_sabd_i64,
169209a52d85SRichard Henderson           .fniv = gen_sabd_vec,
169309a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_d,
169409a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
169509a52d85SRichard Henderson           .opt_opc = vecop_list,
169609a52d85SRichard Henderson           .vece = MO_64 },
169709a52d85SRichard Henderson     };
169809a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
169909a52d85SRichard Henderson }
170009a52d85SRichard Henderson 
gen_uabd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)170109a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
170209a52d85SRichard Henderson {
170309a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
170409a52d85SRichard Henderson 
170509a52d85SRichard Henderson     tcg_gen_sub_i32(t, a, b);
170609a52d85SRichard Henderson     tcg_gen_sub_i32(d, b, a);
170709a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
170809a52d85SRichard Henderson }
170909a52d85SRichard Henderson 
gen_uabd_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)171009a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
171109a52d85SRichard Henderson {
171209a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
171309a52d85SRichard Henderson 
171409a52d85SRichard Henderson     tcg_gen_sub_i64(t, a, b);
171509a52d85SRichard Henderson     tcg_gen_sub_i64(d, b, a);
171609a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
171709a52d85SRichard Henderson }
171809a52d85SRichard Henderson 
gen_uabd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)171909a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
172009a52d85SRichard Henderson {
172109a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
172209a52d85SRichard Henderson 
172309a52d85SRichard Henderson     tcg_gen_umin_vec(vece, t, a, b);
172409a52d85SRichard Henderson     tcg_gen_umax_vec(vece, d, a, b);
172509a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
172609a52d85SRichard Henderson }
172709a52d85SRichard Henderson 
gen_gvec_uabd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)172809a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
172909a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
173009a52d85SRichard Henderson {
173109a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
173209a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
173309a52d85SRichard Henderson     };
173409a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
173509a52d85SRichard Henderson         { .fniv = gen_uabd_vec,
173609a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_b,
173709a52d85SRichard Henderson           .opt_opc = vecop_list,
173809a52d85SRichard Henderson           .vece = MO_8 },
173909a52d85SRichard Henderson         { .fniv = gen_uabd_vec,
174009a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_h,
174109a52d85SRichard Henderson           .opt_opc = vecop_list,
174209a52d85SRichard Henderson           .vece = MO_16 },
174309a52d85SRichard Henderson         { .fni4 = gen_uabd_i32,
174409a52d85SRichard Henderson           .fniv = gen_uabd_vec,
174509a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_s,
174609a52d85SRichard Henderson           .opt_opc = vecop_list,
174709a52d85SRichard Henderson           .vece = MO_32 },
174809a52d85SRichard Henderson         { .fni8 = gen_uabd_i64,
174909a52d85SRichard Henderson           .fniv = gen_uabd_vec,
175009a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_d,
175109a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
175209a52d85SRichard Henderson           .opt_opc = vecop_list,
175309a52d85SRichard Henderson           .vece = MO_64 },
175409a52d85SRichard Henderson     };
175509a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
175609a52d85SRichard Henderson }
175709a52d85SRichard Henderson 
gen_saba_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)175809a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
175909a52d85SRichard Henderson {
176009a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
176109a52d85SRichard Henderson     gen_sabd_i32(t, a, b);
176209a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
176309a52d85SRichard Henderson }
176409a52d85SRichard Henderson 
gen_saba_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)176509a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
176609a52d85SRichard Henderson {
176709a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
176809a52d85SRichard Henderson     gen_sabd_i64(t, a, b);
176909a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
177009a52d85SRichard Henderson }
177109a52d85SRichard Henderson 
gen_saba_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)177209a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
177309a52d85SRichard Henderson {
177409a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
177509a52d85SRichard Henderson     gen_sabd_vec(vece, t, a, b);
177609a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
177709a52d85SRichard Henderson }
177809a52d85SRichard Henderson 
gen_gvec_saba(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)177909a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
178009a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
178109a52d85SRichard Henderson {
178209a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
178309a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_add_vec,
178409a52d85SRichard Henderson         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
178509a52d85SRichard Henderson     };
178609a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
178709a52d85SRichard Henderson         { .fniv = gen_saba_vec,
178809a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_b,
178909a52d85SRichard Henderson           .opt_opc = vecop_list,
179009a52d85SRichard Henderson           .load_dest = true,
179109a52d85SRichard Henderson           .vece = MO_8 },
179209a52d85SRichard Henderson         { .fniv = gen_saba_vec,
179309a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_h,
179409a52d85SRichard Henderson           .opt_opc = vecop_list,
179509a52d85SRichard Henderson           .load_dest = true,
179609a52d85SRichard Henderson           .vece = MO_16 },
179709a52d85SRichard Henderson         { .fni4 = gen_saba_i32,
179809a52d85SRichard Henderson           .fniv = gen_saba_vec,
179909a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_s,
180009a52d85SRichard Henderson           .opt_opc = vecop_list,
180109a52d85SRichard Henderson           .load_dest = true,
180209a52d85SRichard Henderson           .vece = MO_32 },
180309a52d85SRichard Henderson         { .fni8 = gen_saba_i64,
180409a52d85SRichard Henderson           .fniv = gen_saba_vec,
180509a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_d,
180609a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
180709a52d85SRichard Henderson           .opt_opc = vecop_list,
180809a52d85SRichard Henderson           .load_dest = true,
180909a52d85SRichard Henderson           .vece = MO_64 },
181009a52d85SRichard Henderson     };
181109a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
181209a52d85SRichard Henderson }
181309a52d85SRichard Henderson 
gen_uaba_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)181409a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
181509a52d85SRichard Henderson {
181609a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
181709a52d85SRichard Henderson     gen_uabd_i32(t, a, b);
181809a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
181909a52d85SRichard Henderson }
182009a52d85SRichard Henderson 
gen_uaba_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)182109a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
182209a52d85SRichard Henderson {
182309a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
182409a52d85SRichard Henderson     gen_uabd_i64(t, a, b);
182509a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
182609a52d85SRichard Henderson }
182709a52d85SRichard Henderson 
gen_uaba_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)182809a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
182909a52d85SRichard Henderson {
183009a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
183109a52d85SRichard Henderson     gen_uabd_vec(vece, t, a, b);
183209a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
183309a52d85SRichard Henderson }
183409a52d85SRichard Henderson 
gen_gvec_uaba(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)183509a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
183609a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
183709a52d85SRichard Henderson {
183809a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
183909a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_add_vec,
184009a52d85SRichard Henderson         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
184109a52d85SRichard Henderson     };
184209a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
184309a52d85SRichard Henderson         { .fniv = gen_uaba_vec,
184409a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_b,
184509a52d85SRichard Henderson           .opt_opc = vecop_list,
184609a52d85SRichard Henderson           .load_dest = true,
184709a52d85SRichard Henderson           .vece = MO_8 },
184809a52d85SRichard Henderson         { .fniv = gen_uaba_vec,
184909a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_h,
185009a52d85SRichard Henderson           .opt_opc = vecop_list,
185109a52d85SRichard Henderson           .load_dest = true,
185209a52d85SRichard Henderson           .vece = MO_16 },
185309a52d85SRichard Henderson         { .fni4 = gen_uaba_i32,
185409a52d85SRichard Henderson           .fniv = gen_uaba_vec,
185509a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_s,
185609a52d85SRichard Henderson           .opt_opc = vecop_list,
185709a52d85SRichard Henderson           .load_dest = true,
185809a52d85SRichard Henderson           .vece = MO_32 },
185909a52d85SRichard Henderson         { .fni8 = gen_uaba_i64,
186009a52d85SRichard Henderson           .fniv = gen_uaba_vec,
186109a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_d,
186209a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
186309a52d85SRichard Henderson           .opt_opc = vecop_list,
186409a52d85SRichard Henderson           .load_dest = true,
186509a52d85SRichard Henderson           .vece = MO_64 },
186609a52d85SRichard Henderson     };
186709a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
186809a52d85SRichard Henderson }
1869a7e4eec6SRichard Henderson 
gen_gvec_addp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1870a7e4eec6SRichard Henderson void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1871a7e4eec6SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1872a7e4eec6SRichard Henderson {
1873a7e4eec6SRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
1874a7e4eec6SRichard Henderson         gen_helper_gvec_addp_b,
1875a7e4eec6SRichard Henderson         gen_helper_gvec_addp_h,
1876a7e4eec6SRichard Henderson         gen_helper_gvec_addp_s,
1877a7e4eec6SRichard Henderson         gen_helper_gvec_addp_d,
1878a7e4eec6SRichard Henderson     };
1879a7e4eec6SRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1880a7e4eec6SRichard Henderson }
188128b5451bSRichard Henderson 
gen_gvec_smaxp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)188228b5451bSRichard Henderson void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
188328b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
188428b5451bSRichard Henderson {
188528b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
188628b5451bSRichard Henderson         gen_helper_gvec_smaxp_b,
188728b5451bSRichard Henderson         gen_helper_gvec_smaxp_h,
188828b5451bSRichard Henderson         gen_helper_gvec_smaxp_s,
188928b5451bSRichard Henderson     };
189028b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
189128b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
189228b5451bSRichard Henderson }
189328b5451bSRichard Henderson 
gen_gvec_sminp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)189428b5451bSRichard Henderson void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
189528b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
189628b5451bSRichard Henderson {
189728b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
189828b5451bSRichard Henderson         gen_helper_gvec_sminp_b,
189928b5451bSRichard Henderson         gen_helper_gvec_sminp_h,
190028b5451bSRichard Henderson         gen_helper_gvec_sminp_s,
190128b5451bSRichard Henderson     };
190228b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
190328b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
190428b5451bSRichard Henderson }
190528b5451bSRichard Henderson 
gen_gvec_umaxp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)190628b5451bSRichard Henderson void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
190728b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
190828b5451bSRichard Henderson {
190928b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
191028b5451bSRichard Henderson         gen_helper_gvec_umaxp_b,
191128b5451bSRichard Henderson         gen_helper_gvec_umaxp_h,
191228b5451bSRichard Henderson         gen_helper_gvec_umaxp_s,
191328b5451bSRichard Henderson     };
191428b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
191528b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
191628b5451bSRichard Henderson }
191728b5451bSRichard Henderson 
gen_gvec_uminp(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)191828b5451bSRichard Henderson void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
191928b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
192028b5451bSRichard Henderson {
192128b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
192228b5451bSRichard Henderson         gen_helper_gvec_uminp_b,
192328b5451bSRichard Henderson         gen_helper_gvec_uminp_h,
192428b5451bSRichard Henderson         gen_helper_gvec_uminp_s,
192528b5451bSRichard Henderson     };
192628b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
192728b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
192828b5451bSRichard Henderson }
1929203aca91SRichard Henderson 
gen_shadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1930203aca91SRichard Henderson static void gen_shadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1931203aca91SRichard Henderson {
1932203aca91SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1933203aca91SRichard Henderson 
1934203aca91SRichard Henderson     tcg_gen_and_i64(t, a, b);
1935203aca91SRichard Henderson     tcg_gen_vec_sar8i_i64(a, a, 1);
1936203aca91SRichard Henderson     tcg_gen_vec_sar8i_i64(b, b, 1);
1937203aca91SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
1938203aca91SRichard Henderson     tcg_gen_vec_add8_i64(d, a, b);
1939203aca91SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
1940203aca91SRichard Henderson }
1941203aca91SRichard Henderson 
gen_shadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)1942203aca91SRichard Henderson static void gen_shadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1943203aca91SRichard Henderson {
1944203aca91SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1945203aca91SRichard Henderson 
1946203aca91SRichard Henderson     tcg_gen_and_i64(t, a, b);
1947203aca91SRichard Henderson     tcg_gen_vec_sar16i_i64(a, a, 1);
1948203aca91SRichard Henderson     tcg_gen_vec_sar16i_i64(b, b, 1);
1949203aca91SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
1950203aca91SRichard Henderson     tcg_gen_vec_add16_i64(d, a, b);
1951203aca91SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
1952203aca91SRichard Henderson }
1953203aca91SRichard Henderson 
gen_shadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)1954203aca91SRichard Henderson static void gen_shadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1955203aca91SRichard Henderson {
1956203aca91SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
1957203aca91SRichard Henderson 
1958203aca91SRichard Henderson     tcg_gen_and_i32(t, a, b);
1959203aca91SRichard Henderson     tcg_gen_sari_i32(a, a, 1);
1960203aca91SRichard Henderson     tcg_gen_sari_i32(b, b, 1);
1961203aca91SRichard Henderson     tcg_gen_andi_i32(t, t, 1);
1962203aca91SRichard Henderson     tcg_gen_add_i32(d, a, b);
1963203aca91SRichard Henderson     tcg_gen_add_i32(d, d, t);
1964203aca91SRichard Henderson }
1965203aca91SRichard Henderson 
gen_shadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)1966203aca91SRichard Henderson static void gen_shadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1967203aca91SRichard Henderson {
1968203aca91SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
1969203aca91SRichard Henderson 
1970203aca91SRichard Henderson     tcg_gen_and_vec(vece, t, a, b);
1971203aca91SRichard Henderson     tcg_gen_sari_vec(vece, a, a, 1);
1972203aca91SRichard Henderson     tcg_gen_sari_vec(vece, b, b, 1);
1973203aca91SRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
1974203aca91SRichard Henderson     tcg_gen_add_vec(vece, d, a, b);
1975203aca91SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
1976203aca91SRichard Henderson }
1977203aca91SRichard Henderson 
gen_gvec_shadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)1978203aca91SRichard Henderson void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1979203aca91SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1980203aca91SRichard Henderson {
1981203aca91SRichard Henderson     static const TCGOpcode vecop_list[] = {
1982203aca91SRichard Henderson         INDEX_op_sari_vec, INDEX_op_add_vec, 0
1983203aca91SRichard Henderson     };
1984203aca91SRichard Henderson     static const GVecGen3 g[] = {
1985203aca91SRichard Henderson         { .fni8 = gen_shadd8_i64,
1986203aca91SRichard Henderson           .fniv = gen_shadd_vec,
1987203aca91SRichard Henderson           .opt_opc = vecop_list,
1988203aca91SRichard Henderson           .vece = MO_8 },
1989203aca91SRichard Henderson         { .fni8 = gen_shadd16_i64,
1990203aca91SRichard Henderson           .fniv = gen_shadd_vec,
1991203aca91SRichard Henderson           .opt_opc = vecop_list,
1992203aca91SRichard Henderson           .vece = MO_16 },
1993203aca91SRichard Henderson         { .fni4 = gen_shadd_i32,
1994203aca91SRichard Henderson           .fniv = gen_shadd_vec,
1995203aca91SRichard Henderson           .opt_opc = vecop_list,
1996203aca91SRichard Henderson           .vece = MO_32 },
1997203aca91SRichard Henderson     };
1998203aca91SRichard Henderson     tcg_debug_assert(vece <= MO_32);
1999203aca91SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
2000203aca91SRichard Henderson }
2001203aca91SRichard Henderson 
gen_uhadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2002203aca91SRichard Henderson static void gen_uhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2003203aca91SRichard Henderson {
2004203aca91SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
2005203aca91SRichard Henderson 
2006203aca91SRichard Henderson     tcg_gen_and_i64(t, a, b);
2007203aca91SRichard Henderson     tcg_gen_vec_shr8i_i64(a, a, 1);
2008203aca91SRichard Henderson     tcg_gen_vec_shr8i_i64(b, b, 1);
2009203aca91SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
2010203aca91SRichard Henderson     tcg_gen_vec_add8_i64(d, a, b);
2011203aca91SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
2012203aca91SRichard Henderson }
2013203aca91SRichard Henderson 
gen_uhadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)2014203aca91SRichard Henderson static void gen_uhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2015203aca91SRichard Henderson {
2016203aca91SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
2017203aca91SRichard Henderson 
2018203aca91SRichard Henderson     tcg_gen_and_i64(t, a, b);
2019203aca91SRichard Henderson     tcg_gen_vec_shr16i_i64(a, a, 1);
2020203aca91SRichard Henderson     tcg_gen_vec_shr16i_i64(b, b, 1);
2021203aca91SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
2022203aca91SRichard Henderson     tcg_gen_vec_add16_i64(d, a, b);
2023203aca91SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
2024203aca91SRichard Henderson }
2025203aca91SRichard Henderson 
gen_uhadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)2026203aca91SRichard Henderson static void gen_uhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2027203aca91SRichard Henderson {
2028203aca91SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
2029203aca91SRichard Henderson 
2030203aca91SRichard Henderson     tcg_gen_and_i32(t, a, b);
2031203aca91SRichard Henderson     tcg_gen_shri_i32(a, a, 1);
2032203aca91SRichard Henderson     tcg_gen_shri_i32(b, b, 1);
2033203aca91SRichard Henderson     tcg_gen_andi_i32(t, t, 1);
2034203aca91SRichard Henderson     tcg_gen_add_i32(d, a, b);
2035203aca91SRichard Henderson     tcg_gen_add_i32(d, d, t);
2036203aca91SRichard Henderson }
2037203aca91SRichard Henderson 
gen_uhadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)2038203aca91SRichard Henderson static void gen_uhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
2039203aca91SRichard Henderson {
2040203aca91SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
2041203aca91SRichard Henderson 
2042203aca91SRichard Henderson     tcg_gen_and_vec(vece, t, a, b);
2043203aca91SRichard Henderson     tcg_gen_shri_vec(vece, a, a, 1);
2044203aca91SRichard Henderson     tcg_gen_shri_vec(vece, b, b, 1);
2045203aca91SRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
2046203aca91SRichard Henderson     tcg_gen_add_vec(vece, d, a, b);
2047203aca91SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
2048203aca91SRichard Henderson }
2049203aca91SRichard Henderson 
gen_gvec_uhadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)2050203aca91SRichard Henderson void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2051203aca91SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2052203aca91SRichard Henderson {
2053203aca91SRichard Henderson     static const TCGOpcode vecop_list[] = {
2054203aca91SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
2055203aca91SRichard Henderson     };
2056203aca91SRichard Henderson     static const GVecGen3 g[] = {
2057203aca91SRichard Henderson         { .fni8 = gen_uhadd8_i64,
2058203aca91SRichard Henderson           .fniv = gen_uhadd_vec,
2059203aca91SRichard Henderson           .opt_opc = vecop_list,
2060203aca91SRichard Henderson           .vece = MO_8 },
2061203aca91SRichard Henderson         { .fni8 = gen_uhadd16_i64,
2062203aca91SRichard Henderson           .fniv = gen_uhadd_vec,
2063203aca91SRichard Henderson           .opt_opc = vecop_list,
2064203aca91SRichard Henderson           .vece = MO_16 },
2065203aca91SRichard Henderson         { .fni4 = gen_uhadd_i32,
2066203aca91SRichard Henderson           .fniv = gen_uhadd_vec,
2067203aca91SRichard Henderson           .opt_opc = vecop_list,
2068203aca91SRichard Henderson           .vece = MO_32 },
2069203aca91SRichard Henderson     };
2070203aca91SRichard Henderson     tcg_debug_assert(vece <= MO_32);
2071203aca91SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
2072203aca91SRichard Henderson }
207334c0d865SRichard Henderson 
gen_shsub8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)207434c0d865SRichard Henderson static void gen_shsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
207534c0d865SRichard Henderson {
207634c0d865SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
207734c0d865SRichard Henderson 
207834c0d865SRichard Henderson     tcg_gen_andc_i64(t, b, a);
207934c0d865SRichard Henderson     tcg_gen_vec_sar8i_i64(a, a, 1);
208034c0d865SRichard Henderson     tcg_gen_vec_sar8i_i64(b, b, 1);
208134c0d865SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
208234c0d865SRichard Henderson     tcg_gen_vec_sub8_i64(d, a, b);
208334c0d865SRichard Henderson     tcg_gen_vec_sub8_i64(d, d, t);
208434c0d865SRichard Henderson }
208534c0d865SRichard Henderson 
gen_shsub16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)208634c0d865SRichard Henderson static void gen_shsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
208734c0d865SRichard Henderson {
208834c0d865SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
208934c0d865SRichard Henderson 
209034c0d865SRichard Henderson     tcg_gen_andc_i64(t, b, a);
209134c0d865SRichard Henderson     tcg_gen_vec_sar16i_i64(a, a, 1);
209234c0d865SRichard Henderson     tcg_gen_vec_sar16i_i64(b, b, 1);
209334c0d865SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
209434c0d865SRichard Henderson     tcg_gen_vec_sub16_i64(d, a, b);
209534c0d865SRichard Henderson     tcg_gen_vec_sub16_i64(d, d, t);
209634c0d865SRichard Henderson }
209734c0d865SRichard Henderson 
gen_shsub_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)209834c0d865SRichard Henderson static void gen_shsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
209934c0d865SRichard Henderson {
210034c0d865SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
210134c0d865SRichard Henderson 
210234c0d865SRichard Henderson     tcg_gen_andc_i32(t, b, a);
210334c0d865SRichard Henderson     tcg_gen_sari_i32(a, a, 1);
210434c0d865SRichard Henderson     tcg_gen_sari_i32(b, b, 1);
210534c0d865SRichard Henderson     tcg_gen_andi_i32(t, t, 1);
210634c0d865SRichard Henderson     tcg_gen_sub_i32(d, a, b);
210734c0d865SRichard Henderson     tcg_gen_sub_i32(d, d, t);
210834c0d865SRichard Henderson }
210934c0d865SRichard Henderson 
gen_shsub_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)211034c0d865SRichard Henderson static void gen_shsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
211134c0d865SRichard Henderson {
211234c0d865SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
211334c0d865SRichard Henderson 
211434c0d865SRichard Henderson     tcg_gen_andc_vec(vece, t, b, a);
211534c0d865SRichard Henderson     tcg_gen_sari_vec(vece, a, a, 1);
211634c0d865SRichard Henderson     tcg_gen_sari_vec(vece, b, b, 1);
211734c0d865SRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
211834c0d865SRichard Henderson     tcg_gen_sub_vec(vece, d, a, b);
211934c0d865SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
212034c0d865SRichard Henderson }
212134c0d865SRichard Henderson 
gen_gvec_shsub(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)212234c0d865SRichard Henderson void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
212334c0d865SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
212434c0d865SRichard Henderson {
212534c0d865SRichard Henderson     static const TCGOpcode vecop_list[] = {
212634c0d865SRichard Henderson         INDEX_op_sari_vec, INDEX_op_sub_vec, 0
212734c0d865SRichard Henderson     };
212834c0d865SRichard Henderson     static const GVecGen3 g[4] = {
212934c0d865SRichard Henderson         { .fni8 = gen_shsub8_i64,
213034c0d865SRichard Henderson           .fniv = gen_shsub_vec,
213134c0d865SRichard Henderson           .opt_opc = vecop_list,
213234c0d865SRichard Henderson           .vece = MO_8 },
213334c0d865SRichard Henderson         { .fni8 = gen_shsub16_i64,
213434c0d865SRichard Henderson           .fniv = gen_shsub_vec,
213534c0d865SRichard Henderson           .opt_opc = vecop_list,
213634c0d865SRichard Henderson           .vece = MO_16 },
213734c0d865SRichard Henderson         { .fni4 = gen_shsub_i32,
213834c0d865SRichard Henderson           .fniv = gen_shsub_vec,
213934c0d865SRichard Henderson           .opt_opc = vecop_list,
214034c0d865SRichard Henderson           .vece = MO_32 },
214134c0d865SRichard Henderson     };
214234c0d865SRichard Henderson     assert(vece <= MO_32);
214334c0d865SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
214434c0d865SRichard Henderson }
214534c0d865SRichard Henderson 
gen_uhsub8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)214634c0d865SRichard Henderson static void gen_uhsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
214734c0d865SRichard Henderson {
214834c0d865SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
214934c0d865SRichard Henderson 
215034c0d865SRichard Henderson     tcg_gen_andc_i64(t, b, a);
215134c0d865SRichard Henderson     tcg_gen_vec_shr8i_i64(a, a, 1);
215234c0d865SRichard Henderson     tcg_gen_vec_shr8i_i64(b, b, 1);
215334c0d865SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
215434c0d865SRichard Henderson     tcg_gen_vec_sub8_i64(d, a, b);
215534c0d865SRichard Henderson     tcg_gen_vec_sub8_i64(d, d, t);
215634c0d865SRichard Henderson }
215734c0d865SRichard Henderson 
gen_uhsub16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)215834c0d865SRichard Henderson static void gen_uhsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
215934c0d865SRichard Henderson {
216034c0d865SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
216134c0d865SRichard Henderson 
216234c0d865SRichard Henderson     tcg_gen_andc_i64(t, b, a);
216334c0d865SRichard Henderson     tcg_gen_vec_shr16i_i64(a, a, 1);
216434c0d865SRichard Henderson     tcg_gen_vec_shr16i_i64(b, b, 1);
216534c0d865SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
216634c0d865SRichard Henderson     tcg_gen_vec_sub16_i64(d, a, b);
216734c0d865SRichard Henderson     tcg_gen_vec_sub16_i64(d, d, t);
216834c0d865SRichard Henderson }
216934c0d865SRichard Henderson 
gen_uhsub_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)217034c0d865SRichard Henderson static void gen_uhsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
217134c0d865SRichard Henderson {
217234c0d865SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
217334c0d865SRichard Henderson 
217434c0d865SRichard Henderson     tcg_gen_andc_i32(t, b, a);
217534c0d865SRichard Henderson     tcg_gen_shri_i32(a, a, 1);
217634c0d865SRichard Henderson     tcg_gen_shri_i32(b, b, 1);
217734c0d865SRichard Henderson     tcg_gen_andi_i32(t, t, 1);
217834c0d865SRichard Henderson     tcg_gen_sub_i32(d, a, b);
217934c0d865SRichard Henderson     tcg_gen_sub_i32(d, d, t);
218034c0d865SRichard Henderson }
218134c0d865SRichard Henderson 
gen_uhsub_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)218234c0d865SRichard Henderson static void gen_uhsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
218334c0d865SRichard Henderson {
218434c0d865SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
218534c0d865SRichard Henderson 
218634c0d865SRichard Henderson     tcg_gen_andc_vec(vece, t, b, a);
218734c0d865SRichard Henderson     tcg_gen_shri_vec(vece, a, a, 1);
218834c0d865SRichard Henderson     tcg_gen_shri_vec(vece, b, b, 1);
218934c0d865SRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
219034c0d865SRichard Henderson     tcg_gen_sub_vec(vece, d, a, b);
219134c0d865SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
219234c0d865SRichard Henderson }
219334c0d865SRichard Henderson 
gen_gvec_uhsub(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)219434c0d865SRichard Henderson void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
219534c0d865SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
219634c0d865SRichard Henderson {
219734c0d865SRichard Henderson     static const TCGOpcode vecop_list[] = {
219834c0d865SRichard Henderson         INDEX_op_shri_vec, INDEX_op_sub_vec, 0
219934c0d865SRichard Henderson     };
220034c0d865SRichard Henderson     static const GVecGen3 g[4] = {
220134c0d865SRichard Henderson         { .fni8 = gen_uhsub8_i64,
220234c0d865SRichard Henderson           .fniv = gen_uhsub_vec,
220334c0d865SRichard Henderson           .opt_opc = vecop_list,
220434c0d865SRichard Henderson           .vece = MO_8 },
220534c0d865SRichard Henderson         { .fni8 = gen_uhsub16_i64,
220634c0d865SRichard Henderson           .fniv = gen_uhsub_vec,
220734c0d865SRichard Henderson           .opt_opc = vecop_list,
220834c0d865SRichard Henderson           .vece = MO_16 },
220934c0d865SRichard Henderson         { .fni4 = gen_uhsub_i32,
221034c0d865SRichard Henderson           .fniv = gen_uhsub_vec,
221134c0d865SRichard Henderson           .opt_opc = vecop_list,
221234c0d865SRichard Henderson           .vece = MO_32 },
221334c0d865SRichard Henderson     };
221434c0d865SRichard Henderson     assert(vece <= MO_32);
221534c0d865SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
221634c0d865SRichard Henderson }
22178989b95eSRichard Henderson 
gen_srhadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)22188989b95eSRichard Henderson static void gen_srhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
22198989b95eSRichard Henderson {
22208989b95eSRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
22218989b95eSRichard Henderson 
22228989b95eSRichard Henderson     tcg_gen_or_i64(t, a, b);
22238989b95eSRichard Henderson     tcg_gen_vec_sar8i_i64(a, a, 1);
22248989b95eSRichard Henderson     tcg_gen_vec_sar8i_i64(b, b, 1);
22258989b95eSRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
22268989b95eSRichard Henderson     tcg_gen_vec_add8_i64(d, a, b);
22278989b95eSRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
22288989b95eSRichard Henderson }
22298989b95eSRichard Henderson 
gen_srhadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)22308989b95eSRichard Henderson static void gen_srhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
22318989b95eSRichard Henderson {
22328989b95eSRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
22338989b95eSRichard Henderson 
22348989b95eSRichard Henderson     tcg_gen_or_i64(t, a, b);
22358989b95eSRichard Henderson     tcg_gen_vec_sar16i_i64(a, a, 1);
22368989b95eSRichard Henderson     tcg_gen_vec_sar16i_i64(b, b, 1);
22378989b95eSRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
22388989b95eSRichard Henderson     tcg_gen_vec_add16_i64(d, a, b);
22398989b95eSRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
22408989b95eSRichard Henderson }
22418989b95eSRichard Henderson 
gen_srhadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)22428989b95eSRichard Henderson static void gen_srhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
22438989b95eSRichard Henderson {
22448989b95eSRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
22458989b95eSRichard Henderson 
22468989b95eSRichard Henderson     tcg_gen_or_i32(t, a, b);
22478989b95eSRichard Henderson     tcg_gen_sari_i32(a, a, 1);
22488989b95eSRichard Henderson     tcg_gen_sari_i32(b, b, 1);
22498989b95eSRichard Henderson     tcg_gen_andi_i32(t, t, 1);
22508989b95eSRichard Henderson     tcg_gen_add_i32(d, a, b);
22518989b95eSRichard Henderson     tcg_gen_add_i32(d, d, t);
22528989b95eSRichard Henderson }
22538989b95eSRichard Henderson 
gen_srhadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)22548989b95eSRichard Henderson static void gen_srhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
22558989b95eSRichard Henderson {
22568989b95eSRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
22578989b95eSRichard Henderson 
22588989b95eSRichard Henderson     tcg_gen_or_vec(vece, t, a, b);
22598989b95eSRichard Henderson     tcg_gen_sari_vec(vece, a, a, 1);
22608989b95eSRichard Henderson     tcg_gen_sari_vec(vece, b, b, 1);
22618989b95eSRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
22628989b95eSRichard Henderson     tcg_gen_add_vec(vece, d, a, b);
22638989b95eSRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
22648989b95eSRichard Henderson }
22658989b95eSRichard Henderson 
gen_gvec_srhadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)22668989b95eSRichard Henderson void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
22678989b95eSRichard Henderson                      uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
22688989b95eSRichard Henderson {
22698989b95eSRichard Henderson     static const TCGOpcode vecop_list[] = {
22708989b95eSRichard Henderson         INDEX_op_sari_vec, INDEX_op_add_vec, 0
22718989b95eSRichard Henderson     };
22728989b95eSRichard Henderson     static const GVecGen3 g[] = {
22738989b95eSRichard Henderson         { .fni8 = gen_srhadd8_i64,
22748989b95eSRichard Henderson           .fniv = gen_srhadd_vec,
22758989b95eSRichard Henderson           .opt_opc = vecop_list,
22768989b95eSRichard Henderson           .vece = MO_8 },
22778989b95eSRichard Henderson         { .fni8 = gen_srhadd16_i64,
22788989b95eSRichard Henderson           .fniv = gen_srhadd_vec,
22798989b95eSRichard Henderson           .opt_opc = vecop_list,
22808989b95eSRichard Henderson           .vece = MO_16 },
22818989b95eSRichard Henderson         { .fni4 = gen_srhadd_i32,
22828989b95eSRichard Henderson           .fniv = gen_srhadd_vec,
22838989b95eSRichard Henderson           .opt_opc = vecop_list,
22848989b95eSRichard Henderson           .vece = MO_32 },
22858989b95eSRichard Henderson     };
22868989b95eSRichard Henderson     assert(vece <= MO_32);
22878989b95eSRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
22888989b95eSRichard Henderson }
22898989b95eSRichard Henderson 
gen_urhadd8_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)22908989b95eSRichard Henderson static void gen_urhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
22918989b95eSRichard Henderson {
22928989b95eSRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
22938989b95eSRichard Henderson 
22948989b95eSRichard Henderson     tcg_gen_or_i64(t, a, b);
22958989b95eSRichard Henderson     tcg_gen_vec_shr8i_i64(a, a, 1);
22968989b95eSRichard Henderson     tcg_gen_vec_shr8i_i64(b, b, 1);
22978989b95eSRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
22988989b95eSRichard Henderson     tcg_gen_vec_add8_i64(d, a, b);
22998989b95eSRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
23008989b95eSRichard Henderson }
23018989b95eSRichard Henderson 
gen_urhadd16_i64(TCGv_i64 d,TCGv_i64 a,TCGv_i64 b)23028989b95eSRichard Henderson static void gen_urhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
23038989b95eSRichard Henderson {
23048989b95eSRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
23058989b95eSRichard Henderson 
23068989b95eSRichard Henderson     tcg_gen_or_i64(t, a, b);
23078989b95eSRichard Henderson     tcg_gen_vec_shr16i_i64(a, a, 1);
23088989b95eSRichard Henderson     tcg_gen_vec_shr16i_i64(b, b, 1);
23098989b95eSRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
23108989b95eSRichard Henderson     tcg_gen_vec_add16_i64(d, a, b);
23118989b95eSRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
23128989b95eSRichard Henderson }
23138989b95eSRichard Henderson 
gen_urhadd_i32(TCGv_i32 d,TCGv_i32 a,TCGv_i32 b)23148989b95eSRichard Henderson static void gen_urhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
23158989b95eSRichard Henderson {
23168989b95eSRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
23178989b95eSRichard Henderson 
23188989b95eSRichard Henderson     tcg_gen_or_i32(t, a, b);
23198989b95eSRichard Henderson     tcg_gen_shri_i32(a, a, 1);
23208989b95eSRichard Henderson     tcg_gen_shri_i32(b, b, 1);
23218989b95eSRichard Henderson     tcg_gen_andi_i32(t, t, 1);
23228989b95eSRichard Henderson     tcg_gen_add_i32(d, a, b);
23238989b95eSRichard Henderson     tcg_gen_add_i32(d, d, t);
23248989b95eSRichard Henderson }
23258989b95eSRichard Henderson 
gen_urhadd_vec(unsigned vece,TCGv_vec d,TCGv_vec a,TCGv_vec b)23268989b95eSRichard Henderson static void gen_urhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
23278989b95eSRichard Henderson {
23288989b95eSRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
23298989b95eSRichard Henderson 
23308989b95eSRichard Henderson     tcg_gen_or_vec(vece, t, a, b);
23318989b95eSRichard Henderson     tcg_gen_shri_vec(vece, a, a, 1);
23328989b95eSRichard Henderson     tcg_gen_shri_vec(vece, b, b, 1);
23338989b95eSRichard Henderson     tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1));
23348989b95eSRichard Henderson     tcg_gen_add_vec(vece, d, a, b);
23358989b95eSRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
23368989b95eSRichard Henderson }
23378989b95eSRichard Henderson 
gen_gvec_urhadd(unsigned vece,uint32_t rd_ofs,uint32_t rn_ofs,uint32_t rm_ofs,uint32_t opr_sz,uint32_t max_sz)23388989b95eSRichard Henderson void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
23398989b95eSRichard Henderson                      uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
23408989b95eSRichard Henderson {
23418989b95eSRichard Henderson     static const TCGOpcode vecop_list[] = {
23428989b95eSRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
23438989b95eSRichard Henderson     };
23448989b95eSRichard Henderson     static const GVecGen3 g[] = {
23458989b95eSRichard Henderson         { .fni8 = gen_urhadd8_i64,
23468989b95eSRichard Henderson           .fniv = gen_urhadd_vec,
23478989b95eSRichard Henderson           .opt_opc = vecop_list,
23488989b95eSRichard Henderson           .vece = MO_8 },
23498989b95eSRichard Henderson         { .fni8 = gen_urhadd16_i64,
23508989b95eSRichard Henderson           .fniv = gen_urhadd_vec,
23518989b95eSRichard Henderson           .opt_opc = vecop_list,
23528989b95eSRichard Henderson           .vece = MO_16 },
23538989b95eSRichard Henderson         { .fni4 = gen_urhadd_i32,
23548989b95eSRichard Henderson           .fniv = gen_urhadd_vec,
23558989b95eSRichard Henderson           .opt_opc = vecop_list,
23568989b95eSRichard Henderson           .vece = MO_32 },
23578989b95eSRichard Henderson     };
23588989b95eSRichard Henderson     assert(vece <= MO_32);
23598989b95eSRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
23608989b95eSRichard Henderson }
2361