xref: /openbmc/qemu/target/arm/tcg/gengvec.c (revision f4fa83d6)
109a52d85SRichard Henderson /*
209a52d85SRichard Henderson  *  ARM generic vector expansion
309a52d85SRichard Henderson  *
409a52d85SRichard Henderson  *  Copyright (c) 2003 Fabrice Bellard
509a52d85SRichard Henderson  *  Copyright (c) 2005-2007 CodeSourcery
609a52d85SRichard Henderson  *  Copyright (c) 2007 OpenedHand, Ltd.
709a52d85SRichard Henderson  *
809a52d85SRichard Henderson  * This library is free software; you can redistribute it and/or
909a52d85SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
1009a52d85SRichard Henderson  * License as published by the Free Software Foundation; either
1109a52d85SRichard Henderson  * version 2.1 of the License, or (at your option) any later version.
1209a52d85SRichard Henderson  *
1309a52d85SRichard Henderson  * This library is distributed in the hope that it will be useful,
1409a52d85SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
1509a52d85SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1609a52d85SRichard Henderson  * Lesser General Public License for more details.
1709a52d85SRichard Henderson  *
1809a52d85SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
1909a52d85SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
2009a52d85SRichard Henderson  */
2109a52d85SRichard Henderson 
2209a52d85SRichard Henderson #include "qemu/osdep.h"
2309a52d85SRichard Henderson #include "translate.h"
2409a52d85SRichard Henderson 
2509a52d85SRichard Henderson 
2609a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2709a52d85SRichard Henderson                             uint32_t opr_sz, uint32_t max_sz,
2809a52d85SRichard Henderson                             gen_helper_gvec_3_ptr *fn)
2909a52d85SRichard Henderson {
3009a52d85SRichard Henderson     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3109a52d85SRichard Henderson 
3201d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
3309a52d85SRichard Henderson     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
3409a52d85SRichard Henderson     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3509a52d85SRichard Henderson                        opr_sz, max_sz, 0, fn);
3609a52d85SRichard Henderson }
3709a52d85SRichard Henderson 
3809a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3909a52d85SRichard Henderson                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4009a52d85SRichard Henderson {
4109a52d85SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
4209a52d85SRichard Henderson         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
4309a52d85SRichard Henderson     };
4409a52d85SRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
4509a52d85SRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
4609a52d85SRichard Henderson }
4709a52d85SRichard Henderson 
4809a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4909a52d85SRichard Henderson                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5009a52d85SRichard Henderson {
5109a52d85SRichard Henderson     static gen_helper_gvec_3_ptr * const fns[2] = {
5209a52d85SRichard Henderson         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
5309a52d85SRichard Henderson     };
5409a52d85SRichard Henderson     tcg_debug_assert(vece >= 1 && vece <= 2);
5509a52d85SRichard Henderson     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
5609a52d85SRichard Henderson }
5709a52d85SRichard Henderson 
5809a52d85SRichard Henderson #define GEN_CMP0(NAME, COND)                              \
5909a52d85SRichard Henderson     void NAME(unsigned vece, uint32_t d, uint32_t m,      \
6009a52d85SRichard Henderson               uint32_t opr_sz, uint32_t max_sz)           \
6109a52d85SRichard Henderson     { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
6209a52d85SRichard Henderson 
6309a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
6409a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
6509a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
6609a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
6709a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
6809a52d85SRichard Henderson 
6909a52d85SRichard Henderson #undef GEN_CMP0
7009a52d85SRichard Henderson 
7109a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
7209a52d85SRichard Henderson {
7309a52d85SRichard Henderson     tcg_gen_vec_sar8i_i64(a, a, shift);
7409a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, a);
7509a52d85SRichard Henderson }
7609a52d85SRichard Henderson 
7709a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
7809a52d85SRichard Henderson {
7909a52d85SRichard Henderson     tcg_gen_vec_sar16i_i64(a, a, shift);
8009a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, a);
8109a52d85SRichard Henderson }
8209a52d85SRichard Henderson 
8309a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
8409a52d85SRichard Henderson {
8509a52d85SRichard Henderson     tcg_gen_sari_i32(a, a, shift);
8609a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
8709a52d85SRichard Henderson }
8809a52d85SRichard Henderson 
8909a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9009a52d85SRichard Henderson {
9109a52d85SRichard Henderson     tcg_gen_sari_i64(a, a, shift);
9209a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
9309a52d85SRichard Henderson }
9409a52d85SRichard Henderson 
9509a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9609a52d85SRichard Henderson {
9709a52d85SRichard Henderson     tcg_gen_sari_vec(vece, a, a, sh);
9809a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
9909a52d85SRichard Henderson }
10009a52d85SRichard Henderson 
10109a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
10209a52d85SRichard Henderson                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
10309a52d85SRichard Henderson {
10409a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
10509a52d85SRichard Henderson         INDEX_op_sari_vec, INDEX_op_add_vec, 0
10609a52d85SRichard Henderson     };
10709a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
10809a52d85SRichard Henderson         { .fni8 = gen_ssra8_i64,
10909a52d85SRichard Henderson           .fniv = gen_ssra_vec,
11009a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_b,
11109a52d85SRichard Henderson           .load_dest = true,
11209a52d85SRichard Henderson           .opt_opc = vecop_list,
11309a52d85SRichard Henderson           .vece = MO_8 },
11409a52d85SRichard Henderson         { .fni8 = gen_ssra16_i64,
11509a52d85SRichard Henderson           .fniv = gen_ssra_vec,
11609a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_h,
11709a52d85SRichard Henderson           .load_dest = true,
11809a52d85SRichard Henderson           .opt_opc = vecop_list,
11909a52d85SRichard Henderson           .vece = MO_16 },
12009a52d85SRichard Henderson         { .fni4 = gen_ssra32_i32,
12109a52d85SRichard Henderson           .fniv = gen_ssra_vec,
12209a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_s,
12309a52d85SRichard Henderson           .load_dest = true,
12409a52d85SRichard Henderson           .opt_opc = vecop_list,
12509a52d85SRichard Henderson           .vece = MO_32 },
12609a52d85SRichard Henderson         { .fni8 = gen_ssra64_i64,
12709a52d85SRichard Henderson           .fniv = gen_ssra_vec,
12809a52d85SRichard Henderson           .fno = gen_helper_gvec_ssra_d,
12909a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
13009a52d85SRichard Henderson           .opt_opc = vecop_list,
13109a52d85SRichard Henderson           .load_dest = true,
13209a52d85SRichard Henderson           .vece = MO_64 },
13309a52d85SRichard Henderson     };
13409a52d85SRichard Henderson 
13509a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
13609a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
13709a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
13809a52d85SRichard Henderson 
13909a52d85SRichard Henderson     /*
14009a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
14109a52d85SRichard Henderson      * Signed results in all sign bits.
14209a52d85SRichard Henderson      */
14309a52d85SRichard Henderson     shift = MIN(shift, (8 << vece) - 1);
14409a52d85SRichard Henderson     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
14509a52d85SRichard Henderson }
14609a52d85SRichard Henderson 
14709a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
14809a52d85SRichard Henderson {
14909a52d85SRichard Henderson     tcg_gen_vec_shr8i_i64(a, a, shift);
15009a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, a);
15109a52d85SRichard Henderson }
15209a52d85SRichard Henderson 
15309a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
15409a52d85SRichard Henderson {
15509a52d85SRichard Henderson     tcg_gen_vec_shr16i_i64(a, a, shift);
15609a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, a);
15709a52d85SRichard Henderson }
15809a52d85SRichard Henderson 
15909a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
16009a52d85SRichard Henderson {
16109a52d85SRichard Henderson     tcg_gen_shri_i32(a, a, shift);
16209a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
16309a52d85SRichard Henderson }
16409a52d85SRichard Henderson 
16509a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
16609a52d85SRichard Henderson {
16709a52d85SRichard Henderson     tcg_gen_shri_i64(a, a, shift);
16809a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
16909a52d85SRichard Henderson }
17009a52d85SRichard Henderson 
17109a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
17209a52d85SRichard Henderson {
17309a52d85SRichard Henderson     tcg_gen_shri_vec(vece, a, a, sh);
17409a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
17509a52d85SRichard Henderson }
17609a52d85SRichard Henderson 
17709a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
17809a52d85SRichard Henderson                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
17909a52d85SRichard Henderson {
18009a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
18109a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
18209a52d85SRichard Henderson     };
18309a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
18409a52d85SRichard Henderson         { .fni8 = gen_usra8_i64,
18509a52d85SRichard Henderson           .fniv = gen_usra_vec,
18609a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_b,
18709a52d85SRichard Henderson           .load_dest = true,
18809a52d85SRichard Henderson           .opt_opc = vecop_list,
18909a52d85SRichard Henderson           .vece = MO_8, },
19009a52d85SRichard Henderson         { .fni8 = gen_usra16_i64,
19109a52d85SRichard Henderson           .fniv = gen_usra_vec,
19209a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_h,
19309a52d85SRichard Henderson           .load_dest = true,
19409a52d85SRichard Henderson           .opt_opc = vecop_list,
19509a52d85SRichard Henderson           .vece = MO_16, },
19609a52d85SRichard Henderson         { .fni4 = gen_usra32_i32,
19709a52d85SRichard Henderson           .fniv = gen_usra_vec,
19809a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_s,
19909a52d85SRichard Henderson           .load_dest = true,
20009a52d85SRichard Henderson           .opt_opc = vecop_list,
20109a52d85SRichard Henderson           .vece = MO_32, },
20209a52d85SRichard Henderson         { .fni8 = gen_usra64_i64,
20309a52d85SRichard Henderson           .fniv = gen_usra_vec,
20409a52d85SRichard Henderson           .fno = gen_helper_gvec_usra_d,
20509a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
20609a52d85SRichard Henderson           .load_dest = true,
20709a52d85SRichard Henderson           .opt_opc = vecop_list,
20809a52d85SRichard Henderson           .vece = MO_64, },
20909a52d85SRichard Henderson     };
21009a52d85SRichard Henderson 
21109a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
21209a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
21309a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
21409a52d85SRichard Henderson 
21509a52d85SRichard Henderson     /*
21609a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
21709a52d85SRichard Henderson      * Unsigned results in all zeros as input to accumulate: nop.
21809a52d85SRichard Henderson      */
21909a52d85SRichard Henderson     if (shift < (8 << vece)) {
22009a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
22109a52d85SRichard Henderson     } else {
22209a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
22309a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
22409a52d85SRichard Henderson     }
22509a52d85SRichard Henderson }
22609a52d85SRichard Henderson 
22709a52d85SRichard Henderson /*
22809a52d85SRichard Henderson  * Shift one less than the requested amount, and the low bit is
22909a52d85SRichard Henderson  * the rounding bit.  For the 8 and 16-bit operations, because we
23009a52d85SRichard Henderson  * mask the low bit, we can perform a normal integer shift instead
23109a52d85SRichard Henderson  * of a vector shift.
23209a52d85SRichard Henderson  */
23309a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
23409a52d85SRichard Henderson {
23509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
23609a52d85SRichard Henderson 
23709a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
23809a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
23909a52d85SRichard Henderson     tcg_gen_vec_sar8i_i64(d, a, sh);
24009a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
24109a52d85SRichard Henderson }
24209a52d85SRichard Henderson 
24309a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
24409a52d85SRichard Henderson {
24509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
24609a52d85SRichard Henderson 
24709a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
24809a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
24909a52d85SRichard Henderson     tcg_gen_vec_sar16i_i64(d, a, sh);
25009a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
25109a52d85SRichard Henderson }
25209a52d85SRichard Henderson 
25309a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
25409a52d85SRichard Henderson {
25509a52d85SRichard Henderson     TCGv_i32 t;
25609a52d85SRichard Henderson 
25709a52d85SRichard Henderson     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
25809a52d85SRichard Henderson     if (sh == 32) {
25909a52d85SRichard Henderson         tcg_gen_movi_i32(d, 0);
26009a52d85SRichard Henderson         return;
26109a52d85SRichard Henderson     }
26209a52d85SRichard Henderson     t = tcg_temp_new_i32();
26309a52d85SRichard Henderson     tcg_gen_extract_i32(t, a, sh - 1, 1);
26409a52d85SRichard Henderson     tcg_gen_sari_i32(d, a, sh);
26509a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
26609a52d85SRichard Henderson }
26709a52d85SRichard Henderson 
26809a52d85SRichard Henderson  void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
26909a52d85SRichard Henderson {
27009a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
27109a52d85SRichard Henderson 
27209a52d85SRichard Henderson     tcg_gen_extract_i64(t, a, sh - 1, 1);
27309a52d85SRichard Henderson     tcg_gen_sari_i64(d, a, sh);
27409a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
27509a52d85SRichard Henderson }
27609a52d85SRichard Henderson 
27709a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
27809a52d85SRichard Henderson {
27909a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
28009a52d85SRichard Henderson     TCGv_vec ones = tcg_temp_new_vec_matching(d);
28109a52d85SRichard Henderson 
28209a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, sh - 1);
28309a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, ones, 1);
28409a52d85SRichard Henderson     tcg_gen_and_vec(vece, t, t, ones);
28509a52d85SRichard Henderson     tcg_gen_sari_vec(vece, d, a, sh);
28609a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
28709a52d85SRichard Henderson }
28809a52d85SRichard Henderson 
28909a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
29009a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
29109a52d85SRichard Henderson {
29209a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
29309a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
29409a52d85SRichard Henderson     };
29509a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
29609a52d85SRichard Henderson         { .fni8 = gen_srshr8_i64,
29709a52d85SRichard Henderson           .fniv = gen_srshr_vec,
29809a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_b,
29909a52d85SRichard Henderson           .opt_opc = vecop_list,
30009a52d85SRichard Henderson           .vece = MO_8 },
30109a52d85SRichard Henderson         { .fni8 = gen_srshr16_i64,
30209a52d85SRichard Henderson           .fniv = gen_srshr_vec,
30309a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_h,
30409a52d85SRichard Henderson           .opt_opc = vecop_list,
30509a52d85SRichard Henderson           .vece = MO_16 },
30609a52d85SRichard Henderson         { .fni4 = gen_srshr32_i32,
30709a52d85SRichard Henderson           .fniv = gen_srshr_vec,
30809a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_s,
30909a52d85SRichard Henderson           .opt_opc = vecop_list,
31009a52d85SRichard Henderson           .vece = MO_32 },
31109a52d85SRichard Henderson         { .fni8 = gen_srshr64_i64,
31209a52d85SRichard Henderson           .fniv = gen_srshr_vec,
31309a52d85SRichard Henderson           .fno = gen_helper_gvec_srshr_d,
31409a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
31509a52d85SRichard Henderson           .opt_opc = vecop_list,
31609a52d85SRichard Henderson           .vece = MO_64 },
31709a52d85SRichard Henderson     };
31809a52d85SRichard Henderson 
31909a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
32009a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
32109a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
32209a52d85SRichard Henderson 
32309a52d85SRichard Henderson     if (shift == (8 << vece)) {
32409a52d85SRichard Henderson         /*
32509a52d85SRichard Henderson          * Shifts larger than the element size are architecturally valid.
32609a52d85SRichard Henderson          * Signed results in all sign bits.  With rounding, this produces
32709a52d85SRichard Henderson          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
32809a52d85SRichard Henderson          * I.e. always zero.
32909a52d85SRichard Henderson          */
33009a52d85SRichard Henderson         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
33109a52d85SRichard Henderson     } else {
33209a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
33309a52d85SRichard Henderson     }
33409a52d85SRichard Henderson }
33509a52d85SRichard Henderson 
33609a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
33709a52d85SRichard Henderson {
33809a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
33909a52d85SRichard Henderson 
34009a52d85SRichard Henderson     gen_srshr8_i64(t, a, sh);
34109a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
34209a52d85SRichard Henderson }
34309a52d85SRichard Henderson 
34409a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
34509a52d85SRichard Henderson {
34609a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
34709a52d85SRichard Henderson 
34809a52d85SRichard Henderson     gen_srshr16_i64(t, a, sh);
34909a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
35009a52d85SRichard Henderson }
35109a52d85SRichard Henderson 
35209a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
35309a52d85SRichard Henderson {
35409a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
35509a52d85SRichard Henderson 
35609a52d85SRichard Henderson     gen_srshr32_i32(t, a, sh);
35709a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
35809a52d85SRichard Henderson }
35909a52d85SRichard Henderson 
36009a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
36109a52d85SRichard Henderson {
36209a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
36309a52d85SRichard Henderson 
36409a52d85SRichard Henderson     gen_srshr64_i64(t, a, sh);
36509a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
36609a52d85SRichard Henderson }
36709a52d85SRichard Henderson 
36809a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
36909a52d85SRichard Henderson {
37009a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
37109a52d85SRichard Henderson 
37209a52d85SRichard Henderson     gen_srshr_vec(vece, t, a, sh);
37309a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
37409a52d85SRichard Henderson }
37509a52d85SRichard Henderson 
37609a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
37709a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
37809a52d85SRichard Henderson {
37909a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
38009a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
38109a52d85SRichard Henderson     };
38209a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
38309a52d85SRichard Henderson         { .fni8 = gen_srsra8_i64,
38409a52d85SRichard Henderson           .fniv = gen_srsra_vec,
38509a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_b,
38609a52d85SRichard Henderson           .opt_opc = vecop_list,
38709a52d85SRichard Henderson           .load_dest = true,
38809a52d85SRichard Henderson           .vece = MO_8 },
38909a52d85SRichard Henderson         { .fni8 = gen_srsra16_i64,
39009a52d85SRichard Henderson           .fniv = gen_srsra_vec,
39109a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_h,
39209a52d85SRichard Henderson           .opt_opc = vecop_list,
39309a52d85SRichard Henderson           .load_dest = true,
39409a52d85SRichard Henderson           .vece = MO_16 },
39509a52d85SRichard Henderson         { .fni4 = gen_srsra32_i32,
39609a52d85SRichard Henderson           .fniv = gen_srsra_vec,
39709a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_s,
39809a52d85SRichard Henderson           .opt_opc = vecop_list,
39909a52d85SRichard Henderson           .load_dest = true,
40009a52d85SRichard Henderson           .vece = MO_32 },
40109a52d85SRichard Henderson         { .fni8 = gen_srsra64_i64,
40209a52d85SRichard Henderson           .fniv = gen_srsra_vec,
40309a52d85SRichard Henderson           .fno = gen_helper_gvec_srsra_d,
40409a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
40509a52d85SRichard Henderson           .opt_opc = vecop_list,
40609a52d85SRichard Henderson           .load_dest = true,
40709a52d85SRichard Henderson           .vece = MO_64 },
40809a52d85SRichard Henderson     };
40909a52d85SRichard Henderson 
41009a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
41109a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
41209a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
41309a52d85SRichard Henderson 
41409a52d85SRichard Henderson     /*
41509a52d85SRichard Henderson      * Shifts larger than the element size are architecturally valid.
41609a52d85SRichard Henderson      * Signed results in all sign bits.  With rounding, this produces
41709a52d85SRichard Henderson      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
41809a52d85SRichard Henderson      * I.e. always zero.  With accumulation, this leaves D unchanged.
41909a52d85SRichard Henderson      */
42009a52d85SRichard Henderson     if (shift == (8 << vece)) {
42109a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
42209a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
42309a52d85SRichard Henderson     } else {
42409a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
42509a52d85SRichard Henderson     }
42609a52d85SRichard Henderson }
42709a52d85SRichard Henderson 
42809a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
42909a52d85SRichard Henderson {
43009a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
43109a52d85SRichard Henderson 
43209a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
43309a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
43409a52d85SRichard Henderson     tcg_gen_vec_shr8i_i64(d, a, sh);
43509a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
43609a52d85SRichard Henderson }
43709a52d85SRichard Henderson 
43809a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
43909a52d85SRichard Henderson {
44009a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
44109a52d85SRichard Henderson 
44209a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, sh - 1);
44309a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
44409a52d85SRichard Henderson     tcg_gen_vec_shr16i_i64(d, a, sh);
44509a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
44609a52d85SRichard Henderson }
44709a52d85SRichard Henderson 
44809a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
44909a52d85SRichard Henderson {
45009a52d85SRichard Henderson     TCGv_i32 t;
45109a52d85SRichard Henderson 
45209a52d85SRichard Henderson     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
45309a52d85SRichard Henderson     if (sh == 32) {
45409a52d85SRichard Henderson         tcg_gen_extract_i32(d, a, sh - 1, 1);
45509a52d85SRichard Henderson         return;
45609a52d85SRichard Henderson     }
45709a52d85SRichard Henderson     t = tcg_temp_new_i32();
45809a52d85SRichard Henderson     tcg_gen_extract_i32(t, a, sh - 1, 1);
45909a52d85SRichard Henderson     tcg_gen_shri_i32(d, a, sh);
46009a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
46109a52d85SRichard Henderson }
46209a52d85SRichard Henderson 
46309a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
46409a52d85SRichard Henderson {
46509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
46609a52d85SRichard Henderson 
46709a52d85SRichard Henderson     tcg_gen_extract_i64(t, a, sh - 1, 1);
46809a52d85SRichard Henderson     tcg_gen_shri_i64(d, a, sh);
46909a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
47009a52d85SRichard Henderson }
47109a52d85SRichard Henderson 
47209a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
47309a52d85SRichard Henderson {
47409a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
47509a52d85SRichard Henderson     TCGv_vec ones = tcg_temp_new_vec_matching(d);
47609a52d85SRichard Henderson 
47709a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, shift - 1);
47809a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, ones, 1);
47909a52d85SRichard Henderson     tcg_gen_and_vec(vece, t, t, ones);
48009a52d85SRichard Henderson     tcg_gen_shri_vec(vece, d, a, shift);
48109a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
48209a52d85SRichard Henderson }
48309a52d85SRichard Henderson 
48409a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
48509a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
48609a52d85SRichard Henderson {
48709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
48809a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
48909a52d85SRichard Henderson     };
49009a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
49109a52d85SRichard Henderson         { .fni8 = gen_urshr8_i64,
49209a52d85SRichard Henderson           .fniv = gen_urshr_vec,
49309a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_b,
49409a52d85SRichard Henderson           .opt_opc = vecop_list,
49509a52d85SRichard Henderson           .vece = MO_8 },
49609a52d85SRichard Henderson         { .fni8 = gen_urshr16_i64,
49709a52d85SRichard Henderson           .fniv = gen_urshr_vec,
49809a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_h,
49909a52d85SRichard Henderson           .opt_opc = vecop_list,
50009a52d85SRichard Henderson           .vece = MO_16 },
50109a52d85SRichard Henderson         { .fni4 = gen_urshr32_i32,
50209a52d85SRichard Henderson           .fniv = gen_urshr_vec,
50309a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_s,
50409a52d85SRichard Henderson           .opt_opc = vecop_list,
50509a52d85SRichard Henderson           .vece = MO_32 },
50609a52d85SRichard Henderson         { .fni8 = gen_urshr64_i64,
50709a52d85SRichard Henderson           .fniv = gen_urshr_vec,
50809a52d85SRichard Henderson           .fno = gen_helper_gvec_urshr_d,
50909a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
51009a52d85SRichard Henderson           .opt_opc = vecop_list,
51109a52d85SRichard Henderson           .vece = MO_64 },
51209a52d85SRichard Henderson     };
51309a52d85SRichard Henderson 
51409a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
51509a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
51609a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
51709a52d85SRichard Henderson 
51809a52d85SRichard Henderson     if (shift == (8 << vece)) {
51909a52d85SRichard Henderson         /*
52009a52d85SRichard Henderson          * Shifts larger than the element size are architecturally valid.
52109a52d85SRichard Henderson          * Unsigned results in zero.  With rounding, this produces a
52209a52d85SRichard Henderson          * copy of the most significant bit.
52309a52d85SRichard Henderson          */
52409a52d85SRichard Henderson         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
52509a52d85SRichard Henderson     } else {
52609a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
52709a52d85SRichard Henderson     }
52809a52d85SRichard Henderson }
52909a52d85SRichard Henderson 
53009a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
53109a52d85SRichard Henderson {
53209a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
53309a52d85SRichard Henderson 
53409a52d85SRichard Henderson     if (sh == 8) {
53509a52d85SRichard Henderson         tcg_gen_vec_shr8i_i64(t, a, 7);
53609a52d85SRichard Henderson     } else {
53709a52d85SRichard Henderson         gen_urshr8_i64(t, a, sh);
53809a52d85SRichard Henderson     }
53909a52d85SRichard Henderson     tcg_gen_vec_add8_i64(d, d, t);
54009a52d85SRichard Henderson }
54109a52d85SRichard Henderson 
54209a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
54309a52d85SRichard Henderson {
54409a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
54509a52d85SRichard Henderson 
54609a52d85SRichard Henderson     if (sh == 16) {
54709a52d85SRichard Henderson         tcg_gen_vec_shr16i_i64(t, a, 15);
54809a52d85SRichard Henderson     } else {
54909a52d85SRichard Henderson         gen_urshr16_i64(t, a, sh);
55009a52d85SRichard Henderson     }
55109a52d85SRichard Henderson     tcg_gen_vec_add16_i64(d, d, t);
55209a52d85SRichard Henderson }
55309a52d85SRichard Henderson 
55409a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
55509a52d85SRichard Henderson {
55609a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
55709a52d85SRichard Henderson 
55809a52d85SRichard Henderson     if (sh == 32) {
55909a52d85SRichard Henderson         tcg_gen_shri_i32(t, a, 31);
56009a52d85SRichard Henderson     } else {
56109a52d85SRichard Henderson         gen_urshr32_i32(t, a, sh);
56209a52d85SRichard Henderson     }
56309a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
56409a52d85SRichard Henderson }
56509a52d85SRichard Henderson 
56609a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
56709a52d85SRichard Henderson {
56809a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
56909a52d85SRichard Henderson 
57009a52d85SRichard Henderson     if (sh == 64) {
57109a52d85SRichard Henderson         tcg_gen_shri_i64(t, a, 63);
57209a52d85SRichard Henderson     } else {
57309a52d85SRichard Henderson         gen_urshr64_i64(t, a, sh);
57409a52d85SRichard Henderson     }
57509a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
57609a52d85SRichard Henderson }
57709a52d85SRichard Henderson 
57809a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
57909a52d85SRichard Henderson {
58009a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
58109a52d85SRichard Henderson 
58209a52d85SRichard Henderson     if (sh == (8 << vece)) {
58309a52d85SRichard Henderson         tcg_gen_shri_vec(vece, t, a, sh - 1);
58409a52d85SRichard Henderson     } else {
58509a52d85SRichard Henderson         gen_urshr_vec(vece, t, a, sh);
58609a52d85SRichard Henderson     }
58709a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
58809a52d85SRichard Henderson }
58909a52d85SRichard Henderson 
59009a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
59109a52d85SRichard Henderson                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
59209a52d85SRichard Henderson {
59309a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
59409a52d85SRichard Henderson         INDEX_op_shri_vec, INDEX_op_add_vec, 0
59509a52d85SRichard Henderson     };
59609a52d85SRichard Henderson     static const GVecGen2i ops[4] = {
59709a52d85SRichard Henderson         { .fni8 = gen_ursra8_i64,
59809a52d85SRichard Henderson           .fniv = gen_ursra_vec,
59909a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_b,
60009a52d85SRichard Henderson           .opt_opc = vecop_list,
60109a52d85SRichard Henderson           .load_dest = true,
60209a52d85SRichard Henderson           .vece = MO_8 },
60309a52d85SRichard Henderson         { .fni8 = gen_ursra16_i64,
60409a52d85SRichard Henderson           .fniv = gen_ursra_vec,
60509a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_h,
60609a52d85SRichard Henderson           .opt_opc = vecop_list,
60709a52d85SRichard Henderson           .load_dest = true,
60809a52d85SRichard Henderson           .vece = MO_16 },
60909a52d85SRichard Henderson         { .fni4 = gen_ursra32_i32,
61009a52d85SRichard Henderson           .fniv = gen_ursra_vec,
61109a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_s,
61209a52d85SRichard Henderson           .opt_opc = vecop_list,
61309a52d85SRichard Henderson           .load_dest = true,
61409a52d85SRichard Henderson           .vece = MO_32 },
61509a52d85SRichard Henderson         { .fni8 = gen_ursra64_i64,
61609a52d85SRichard Henderson           .fniv = gen_ursra_vec,
61709a52d85SRichard Henderson           .fno = gen_helper_gvec_ursra_d,
61809a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
61909a52d85SRichard Henderson           .opt_opc = vecop_list,
62009a52d85SRichard Henderson           .load_dest = true,
62109a52d85SRichard Henderson           .vece = MO_64 },
62209a52d85SRichard Henderson     };
62309a52d85SRichard Henderson 
62409a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize] */
62509a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
62609a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
62709a52d85SRichard Henderson 
62809a52d85SRichard Henderson     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
62909a52d85SRichard Henderson }
63009a52d85SRichard Henderson 
63109a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
63209a52d85SRichard Henderson {
63309a52d85SRichard Henderson     uint64_t mask = dup_const(MO_8, 0xff >> shift);
63409a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
63509a52d85SRichard Henderson 
63609a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, shift);
63709a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
63809a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
63909a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
64009a52d85SRichard Henderson }
64109a52d85SRichard Henderson 
64209a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
64309a52d85SRichard Henderson {
64409a52d85SRichard Henderson     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
64509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
64609a52d85SRichard Henderson 
64709a52d85SRichard Henderson     tcg_gen_shri_i64(t, a, shift);
64809a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
64909a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
65009a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
65109a52d85SRichard Henderson }
65209a52d85SRichard Henderson 
65309a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
65409a52d85SRichard Henderson {
65509a52d85SRichard Henderson     tcg_gen_shri_i32(a, a, shift);
65609a52d85SRichard Henderson     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
65709a52d85SRichard Henderson }
65809a52d85SRichard Henderson 
65909a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
66009a52d85SRichard Henderson {
66109a52d85SRichard Henderson     tcg_gen_shri_i64(a, a, shift);
66209a52d85SRichard Henderson     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
66309a52d85SRichard Henderson }
66409a52d85SRichard Henderson 
66509a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
66609a52d85SRichard Henderson {
66709a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
66809a52d85SRichard Henderson     TCGv_vec m = tcg_temp_new_vec_matching(d);
66909a52d85SRichard Henderson 
67009a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
67109a52d85SRichard Henderson     tcg_gen_shri_vec(vece, t, a, sh);
67209a52d85SRichard Henderson     tcg_gen_and_vec(vece, d, d, m);
67309a52d85SRichard Henderson     tcg_gen_or_vec(vece, d, d, t);
67409a52d85SRichard Henderson }
67509a52d85SRichard Henderson 
67609a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
67709a52d85SRichard Henderson                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
67809a52d85SRichard Henderson {
67909a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
68009a52d85SRichard Henderson     const GVecGen2i ops[4] = {
68109a52d85SRichard Henderson         { .fni8 = gen_shr8_ins_i64,
68209a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
68309a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_b,
68409a52d85SRichard Henderson           .load_dest = true,
68509a52d85SRichard Henderson           .opt_opc = vecop_list,
68609a52d85SRichard Henderson           .vece = MO_8 },
68709a52d85SRichard Henderson         { .fni8 = gen_shr16_ins_i64,
68809a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
68909a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_h,
69009a52d85SRichard Henderson           .load_dest = true,
69109a52d85SRichard Henderson           .opt_opc = vecop_list,
69209a52d85SRichard Henderson           .vece = MO_16 },
69309a52d85SRichard Henderson         { .fni4 = gen_shr32_ins_i32,
69409a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
69509a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_s,
69609a52d85SRichard Henderson           .load_dest = true,
69709a52d85SRichard Henderson           .opt_opc = vecop_list,
69809a52d85SRichard Henderson           .vece = MO_32 },
69909a52d85SRichard Henderson         { .fni8 = gen_shr64_ins_i64,
70009a52d85SRichard Henderson           .fniv = gen_shr_ins_vec,
70109a52d85SRichard Henderson           .fno = gen_helper_gvec_sri_d,
70209a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
70309a52d85SRichard Henderson           .load_dest = true,
70409a52d85SRichard Henderson           .opt_opc = vecop_list,
70509a52d85SRichard Henderson           .vece = MO_64 },
70609a52d85SRichard Henderson     };
70709a52d85SRichard Henderson 
70809a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [1..esize]. */
70909a52d85SRichard Henderson     tcg_debug_assert(shift > 0);
71009a52d85SRichard Henderson     tcg_debug_assert(shift <= (8 << vece));
71109a52d85SRichard Henderson 
71209a52d85SRichard Henderson     /* Shift of esize leaves destination unchanged. */
71309a52d85SRichard Henderson     if (shift < (8 << vece)) {
71409a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
71509a52d85SRichard Henderson     } else {
71609a52d85SRichard Henderson         /* Nop, but we do need to clear the tail. */
71709a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
71809a52d85SRichard Henderson     }
71909a52d85SRichard Henderson }
72009a52d85SRichard Henderson 
72109a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
72209a52d85SRichard Henderson {
72309a52d85SRichard Henderson     uint64_t mask = dup_const(MO_8, 0xff << shift);
72409a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
72509a52d85SRichard Henderson 
72609a52d85SRichard Henderson     tcg_gen_shli_i64(t, a, shift);
72709a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
72809a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
72909a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
73009a52d85SRichard Henderson }
73109a52d85SRichard Henderson 
73209a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
73309a52d85SRichard Henderson {
73409a52d85SRichard Henderson     uint64_t mask = dup_const(MO_16, 0xffff << shift);
73509a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
73609a52d85SRichard Henderson 
73709a52d85SRichard Henderson     tcg_gen_shli_i64(t, a, shift);
73809a52d85SRichard Henderson     tcg_gen_andi_i64(t, t, mask);
73909a52d85SRichard Henderson     tcg_gen_andi_i64(d, d, ~mask);
74009a52d85SRichard Henderson     tcg_gen_or_i64(d, d, t);
74109a52d85SRichard Henderson }
74209a52d85SRichard Henderson 
74309a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
74409a52d85SRichard Henderson {
74509a52d85SRichard Henderson     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
74609a52d85SRichard Henderson }
74709a52d85SRichard Henderson 
74809a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
74909a52d85SRichard Henderson {
75009a52d85SRichard Henderson     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
75109a52d85SRichard Henderson }
75209a52d85SRichard Henderson 
75309a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
75409a52d85SRichard Henderson {
75509a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
75609a52d85SRichard Henderson     TCGv_vec m = tcg_temp_new_vec_matching(d);
75709a52d85SRichard Henderson 
75809a52d85SRichard Henderson     tcg_gen_shli_vec(vece, t, a, sh);
75909a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
76009a52d85SRichard Henderson     tcg_gen_and_vec(vece, d, d, m);
76109a52d85SRichard Henderson     tcg_gen_or_vec(vece, d, d, t);
76209a52d85SRichard Henderson }
76309a52d85SRichard Henderson 
76409a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
76509a52d85SRichard Henderson                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
76609a52d85SRichard Henderson {
76709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
76809a52d85SRichard Henderson     const GVecGen2i ops[4] = {
76909a52d85SRichard Henderson         { .fni8 = gen_shl8_ins_i64,
77009a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
77109a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_b,
77209a52d85SRichard Henderson           .load_dest = true,
77309a52d85SRichard Henderson           .opt_opc = vecop_list,
77409a52d85SRichard Henderson           .vece = MO_8 },
77509a52d85SRichard Henderson         { .fni8 = gen_shl16_ins_i64,
77609a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
77709a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_h,
77809a52d85SRichard Henderson           .load_dest = true,
77909a52d85SRichard Henderson           .opt_opc = vecop_list,
78009a52d85SRichard Henderson           .vece = MO_16 },
78109a52d85SRichard Henderson         { .fni4 = gen_shl32_ins_i32,
78209a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
78309a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_s,
78409a52d85SRichard Henderson           .load_dest = true,
78509a52d85SRichard Henderson           .opt_opc = vecop_list,
78609a52d85SRichard Henderson           .vece = MO_32 },
78709a52d85SRichard Henderson         { .fni8 = gen_shl64_ins_i64,
78809a52d85SRichard Henderson           .fniv = gen_shl_ins_vec,
78909a52d85SRichard Henderson           .fno = gen_helper_gvec_sli_d,
79009a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
79109a52d85SRichard Henderson           .load_dest = true,
79209a52d85SRichard Henderson           .opt_opc = vecop_list,
79309a52d85SRichard Henderson           .vece = MO_64 },
79409a52d85SRichard Henderson     };
79509a52d85SRichard Henderson 
79609a52d85SRichard Henderson     /* tszimm encoding produces immediates in the range [0..esize-1]. */
79709a52d85SRichard Henderson     tcg_debug_assert(shift >= 0);
79809a52d85SRichard Henderson     tcg_debug_assert(shift < (8 << vece));
79909a52d85SRichard Henderson 
80009a52d85SRichard Henderson     if (shift == 0) {
80109a52d85SRichard Henderson         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
80209a52d85SRichard Henderson     } else {
80309a52d85SRichard Henderson         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
80409a52d85SRichard Henderson     }
80509a52d85SRichard Henderson }
80609a52d85SRichard Henderson 
80709a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
80809a52d85SRichard Henderson {
80909a52d85SRichard Henderson     gen_helper_neon_mul_u8(a, a, b);
81009a52d85SRichard Henderson     gen_helper_neon_add_u8(d, d, a);
81109a52d85SRichard Henderson }
81209a52d85SRichard Henderson 
81309a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
81409a52d85SRichard Henderson {
81509a52d85SRichard Henderson     gen_helper_neon_mul_u8(a, a, b);
81609a52d85SRichard Henderson     gen_helper_neon_sub_u8(d, d, a);
81709a52d85SRichard Henderson }
81809a52d85SRichard Henderson 
81909a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
82009a52d85SRichard Henderson {
82109a52d85SRichard Henderson     gen_helper_neon_mul_u16(a, a, b);
82209a52d85SRichard Henderson     gen_helper_neon_add_u16(d, d, a);
82309a52d85SRichard Henderson }
82409a52d85SRichard Henderson 
82509a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
82609a52d85SRichard Henderson {
82709a52d85SRichard Henderson     gen_helper_neon_mul_u16(a, a, b);
82809a52d85SRichard Henderson     gen_helper_neon_sub_u16(d, d, a);
82909a52d85SRichard Henderson }
83009a52d85SRichard Henderson 
83109a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
83209a52d85SRichard Henderson {
83309a52d85SRichard Henderson     tcg_gen_mul_i32(a, a, b);
83409a52d85SRichard Henderson     tcg_gen_add_i32(d, d, a);
83509a52d85SRichard Henderson }
83609a52d85SRichard Henderson 
83709a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
83809a52d85SRichard Henderson {
83909a52d85SRichard Henderson     tcg_gen_mul_i32(a, a, b);
84009a52d85SRichard Henderson     tcg_gen_sub_i32(d, d, a);
84109a52d85SRichard Henderson }
84209a52d85SRichard Henderson 
84309a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
84409a52d85SRichard Henderson {
84509a52d85SRichard Henderson     tcg_gen_mul_i64(a, a, b);
84609a52d85SRichard Henderson     tcg_gen_add_i64(d, d, a);
84709a52d85SRichard Henderson }
84809a52d85SRichard Henderson 
84909a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
85009a52d85SRichard Henderson {
85109a52d85SRichard Henderson     tcg_gen_mul_i64(a, a, b);
85209a52d85SRichard Henderson     tcg_gen_sub_i64(d, d, a);
85309a52d85SRichard Henderson }
85409a52d85SRichard Henderson 
85509a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
85609a52d85SRichard Henderson {
85709a52d85SRichard Henderson     tcg_gen_mul_vec(vece, a, a, b);
85809a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, a);
85909a52d85SRichard Henderson }
86009a52d85SRichard Henderson 
86109a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
86209a52d85SRichard Henderson {
86309a52d85SRichard Henderson     tcg_gen_mul_vec(vece, a, a, b);
86409a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, a);
86509a52d85SRichard Henderson }
86609a52d85SRichard Henderson 
86709a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
86809a52d85SRichard Henderson  * these tables are shared with AArch64 which does support them.
86909a52d85SRichard Henderson  */
87009a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
87109a52d85SRichard Henderson                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
87209a52d85SRichard Henderson {
87309a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
87409a52d85SRichard Henderson         INDEX_op_mul_vec, INDEX_op_add_vec, 0
87509a52d85SRichard Henderson     };
87609a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
87709a52d85SRichard Henderson         { .fni4 = gen_mla8_i32,
87809a52d85SRichard Henderson           .fniv = gen_mla_vec,
87909a52d85SRichard Henderson           .load_dest = true,
88009a52d85SRichard Henderson           .opt_opc = vecop_list,
88109a52d85SRichard Henderson           .vece = MO_8 },
88209a52d85SRichard Henderson         { .fni4 = gen_mla16_i32,
88309a52d85SRichard Henderson           .fniv = gen_mla_vec,
88409a52d85SRichard Henderson           .load_dest = true,
88509a52d85SRichard Henderson           .opt_opc = vecop_list,
88609a52d85SRichard Henderson           .vece = MO_16 },
88709a52d85SRichard Henderson         { .fni4 = gen_mla32_i32,
88809a52d85SRichard Henderson           .fniv = gen_mla_vec,
88909a52d85SRichard Henderson           .load_dest = true,
89009a52d85SRichard Henderson           .opt_opc = vecop_list,
89109a52d85SRichard Henderson           .vece = MO_32 },
89209a52d85SRichard Henderson         { .fni8 = gen_mla64_i64,
89309a52d85SRichard Henderson           .fniv = gen_mla_vec,
89409a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
89509a52d85SRichard Henderson           .load_dest = true,
89609a52d85SRichard Henderson           .opt_opc = vecop_list,
89709a52d85SRichard Henderson           .vece = MO_64 },
89809a52d85SRichard Henderson     };
89909a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
90009a52d85SRichard Henderson }
90109a52d85SRichard Henderson 
90209a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
90309a52d85SRichard Henderson                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
90409a52d85SRichard Henderson {
90509a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
90609a52d85SRichard Henderson         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
90709a52d85SRichard Henderson     };
90809a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
90909a52d85SRichard Henderson         { .fni4 = gen_mls8_i32,
91009a52d85SRichard Henderson           .fniv = gen_mls_vec,
91109a52d85SRichard Henderson           .load_dest = true,
91209a52d85SRichard Henderson           .opt_opc = vecop_list,
91309a52d85SRichard Henderson           .vece = MO_8 },
91409a52d85SRichard Henderson         { .fni4 = gen_mls16_i32,
91509a52d85SRichard Henderson           .fniv = gen_mls_vec,
91609a52d85SRichard Henderson           .load_dest = true,
91709a52d85SRichard Henderson           .opt_opc = vecop_list,
91809a52d85SRichard Henderson           .vece = MO_16 },
91909a52d85SRichard Henderson         { .fni4 = gen_mls32_i32,
92009a52d85SRichard Henderson           .fniv = gen_mls_vec,
92109a52d85SRichard Henderson           .load_dest = true,
92209a52d85SRichard Henderson           .opt_opc = vecop_list,
92309a52d85SRichard Henderson           .vece = MO_32 },
92409a52d85SRichard Henderson         { .fni8 = gen_mls64_i64,
92509a52d85SRichard Henderson           .fniv = gen_mls_vec,
92609a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
92709a52d85SRichard Henderson           .load_dest = true,
92809a52d85SRichard Henderson           .opt_opc = vecop_list,
92909a52d85SRichard Henderson           .vece = MO_64 },
93009a52d85SRichard Henderson     };
93109a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
93209a52d85SRichard Henderson }
93309a52d85SRichard Henderson 
93409a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */
93509a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
93609a52d85SRichard Henderson {
93709a52d85SRichard Henderson     tcg_gen_and_i32(d, a, b);
93809a52d85SRichard Henderson     tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
93909a52d85SRichard Henderson }
94009a52d85SRichard Henderson 
94109a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
94209a52d85SRichard Henderson {
94309a52d85SRichard Henderson     tcg_gen_and_i64(d, a, b);
94409a52d85SRichard Henderson     tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
94509a52d85SRichard Henderson }
94609a52d85SRichard Henderson 
94709a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
94809a52d85SRichard Henderson {
94909a52d85SRichard Henderson     tcg_gen_and_vec(vece, d, a, b);
95009a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, a, 0);
95109a52d85SRichard Henderson     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
95209a52d85SRichard Henderson }
95309a52d85SRichard Henderson 
95409a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
95509a52d85SRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
95609a52d85SRichard Henderson {
95709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
95809a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
95909a52d85SRichard Henderson         { .fni4 = gen_helper_neon_tst_u8,
96009a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
96109a52d85SRichard Henderson           .opt_opc = vecop_list,
96209a52d85SRichard Henderson           .vece = MO_8 },
96309a52d85SRichard Henderson         { .fni4 = gen_helper_neon_tst_u16,
96409a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
96509a52d85SRichard Henderson           .opt_opc = vecop_list,
96609a52d85SRichard Henderson           .vece = MO_16 },
96709a52d85SRichard Henderson         { .fni4 = gen_cmtst_i32,
96809a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
96909a52d85SRichard Henderson           .opt_opc = vecop_list,
97009a52d85SRichard Henderson           .vece = MO_32 },
97109a52d85SRichard Henderson         { .fni8 = gen_cmtst_i64,
97209a52d85SRichard Henderson           .fniv = gen_cmtst_vec,
97309a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
97409a52d85SRichard Henderson           .opt_opc = vecop_list,
97509a52d85SRichard Henderson           .vece = MO_64 },
97609a52d85SRichard Henderson     };
97709a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
97809a52d85SRichard Henderson }
97909a52d85SRichard Henderson 
98009a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
98109a52d85SRichard Henderson {
98209a52d85SRichard Henderson     TCGv_i32 lval = tcg_temp_new_i32();
98309a52d85SRichard Henderson     TCGv_i32 rval = tcg_temp_new_i32();
98409a52d85SRichard Henderson     TCGv_i32 lsh = tcg_temp_new_i32();
98509a52d85SRichard Henderson     TCGv_i32 rsh = tcg_temp_new_i32();
98609a52d85SRichard Henderson     TCGv_i32 zero = tcg_constant_i32(0);
98709a52d85SRichard Henderson     TCGv_i32 max = tcg_constant_i32(32);
98809a52d85SRichard Henderson 
98909a52d85SRichard Henderson     /*
99009a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
99109a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
99209a52d85SRichard Henderson      * Discard out-of-range results after the fact.
99309a52d85SRichard Henderson      */
99409a52d85SRichard Henderson     tcg_gen_ext8s_i32(lsh, shift);
99509a52d85SRichard Henderson     tcg_gen_neg_i32(rsh, lsh);
99609a52d85SRichard Henderson     tcg_gen_shl_i32(lval, src, lsh);
99709a52d85SRichard Henderson     tcg_gen_shr_i32(rval, src, rsh);
99809a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
99909a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
100009a52d85SRichard Henderson }
100109a52d85SRichard Henderson 
100209a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
100309a52d85SRichard Henderson {
100409a52d85SRichard Henderson     TCGv_i64 lval = tcg_temp_new_i64();
100509a52d85SRichard Henderson     TCGv_i64 rval = tcg_temp_new_i64();
100609a52d85SRichard Henderson     TCGv_i64 lsh = tcg_temp_new_i64();
100709a52d85SRichard Henderson     TCGv_i64 rsh = tcg_temp_new_i64();
100809a52d85SRichard Henderson     TCGv_i64 zero = tcg_constant_i64(0);
100909a52d85SRichard Henderson     TCGv_i64 max = tcg_constant_i64(64);
101009a52d85SRichard Henderson 
101109a52d85SRichard Henderson     /*
101209a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
101309a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
101409a52d85SRichard Henderson      * Discard out-of-range results after the fact.
101509a52d85SRichard Henderson      */
101609a52d85SRichard Henderson     tcg_gen_ext8s_i64(lsh, shift);
101709a52d85SRichard Henderson     tcg_gen_neg_i64(rsh, lsh);
101809a52d85SRichard Henderson     tcg_gen_shl_i64(lval, src, lsh);
101909a52d85SRichard Henderson     tcg_gen_shr_i64(rval, src, rsh);
102009a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
102109a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
102209a52d85SRichard Henderson }
102309a52d85SRichard Henderson 
102409a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
102509a52d85SRichard Henderson                          TCGv_vec src, TCGv_vec shift)
102609a52d85SRichard Henderson {
102709a52d85SRichard Henderson     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
102809a52d85SRichard Henderson     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
102909a52d85SRichard Henderson     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
103009a52d85SRichard Henderson     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
103109a52d85SRichard Henderson     TCGv_vec msk, max;
103209a52d85SRichard Henderson 
103309a52d85SRichard Henderson     tcg_gen_neg_vec(vece, rsh, shift);
103409a52d85SRichard Henderson     if (vece == MO_8) {
103509a52d85SRichard Henderson         tcg_gen_mov_vec(lsh, shift);
103609a52d85SRichard Henderson     } else {
103709a52d85SRichard Henderson         msk = tcg_temp_new_vec_matching(dst);
103809a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, msk, 0xff);
103909a52d85SRichard Henderson         tcg_gen_and_vec(vece, lsh, shift, msk);
104009a52d85SRichard Henderson         tcg_gen_and_vec(vece, rsh, rsh, msk);
104109a52d85SRichard Henderson     }
104209a52d85SRichard Henderson 
104309a52d85SRichard Henderson     /*
104409a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
104509a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
104609a52d85SRichard Henderson      * Discard out-of-range results after the fact.
104709a52d85SRichard Henderson      */
104809a52d85SRichard Henderson     tcg_gen_shlv_vec(vece, lval, src, lsh);
104909a52d85SRichard Henderson     tcg_gen_shrv_vec(vece, rval, src, rsh);
105009a52d85SRichard Henderson 
105109a52d85SRichard Henderson     max = tcg_temp_new_vec_matching(dst);
105209a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, max, 8 << vece);
105309a52d85SRichard Henderson 
105409a52d85SRichard Henderson     /*
105509a52d85SRichard Henderson      * The choice of LT (signed) and GEU (unsigned) are biased toward
105609a52d85SRichard Henderson      * the instructions of the x86_64 host.  For MO_8, the whole byte
105709a52d85SRichard Henderson      * is significant so we must use an unsigned compare; otherwise we
105809a52d85SRichard Henderson      * have already masked to a byte and so a signed compare works.
105909a52d85SRichard Henderson      * Other tcg hosts have a full set of comparisons and do not care.
106009a52d85SRichard Henderson      */
106109a52d85SRichard Henderson     if (vece == MO_8) {
106209a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
106309a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
106409a52d85SRichard Henderson         tcg_gen_andc_vec(vece, lval, lval, lsh);
106509a52d85SRichard Henderson         tcg_gen_andc_vec(vece, rval, rval, rsh);
106609a52d85SRichard Henderson     } else {
106709a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
106809a52d85SRichard Henderson         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
106909a52d85SRichard Henderson         tcg_gen_and_vec(vece, lval, lval, lsh);
107009a52d85SRichard Henderson         tcg_gen_and_vec(vece, rval, rval, rsh);
107109a52d85SRichard Henderson     }
107209a52d85SRichard Henderson     tcg_gen_or_vec(vece, dst, lval, rval);
107309a52d85SRichard Henderson }
107409a52d85SRichard Henderson 
107509a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
107609a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
107709a52d85SRichard Henderson {
107809a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
107909a52d85SRichard Henderson         INDEX_op_neg_vec, INDEX_op_shlv_vec,
108009a52d85SRichard Henderson         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
108109a52d85SRichard Henderson     };
108209a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
108309a52d85SRichard Henderson         { .fniv = gen_ushl_vec,
108409a52d85SRichard Henderson           .fno = gen_helper_gvec_ushl_b,
108509a52d85SRichard Henderson           .opt_opc = vecop_list,
108609a52d85SRichard Henderson           .vece = MO_8 },
108709a52d85SRichard Henderson         { .fniv = gen_ushl_vec,
108809a52d85SRichard Henderson           .fno = gen_helper_gvec_ushl_h,
108909a52d85SRichard Henderson           .opt_opc = vecop_list,
109009a52d85SRichard Henderson           .vece = MO_16 },
109109a52d85SRichard Henderson         { .fni4 = gen_ushl_i32,
109209a52d85SRichard Henderson           .fniv = gen_ushl_vec,
109309a52d85SRichard Henderson           .opt_opc = vecop_list,
109409a52d85SRichard Henderson           .vece = MO_32 },
109509a52d85SRichard Henderson         { .fni8 = gen_ushl_i64,
109609a52d85SRichard Henderson           .fniv = gen_ushl_vec,
109709a52d85SRichard Henderson           .opt_opc = vecop_list,
109809a52d85SRichard Henderson           .vece = MO_64 },
109909a52d85SRichard Henderson     };
110009a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
110109a52d85SRichard Henderson }
110209a52d85SRichard Henderson 
110309a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
110409a52d85SRichard Henderson {
110509a52d85SRichard Henderson     TCGv_i32 lval = tcg_temp_new_i32();
110609a52d85SRichard Henderson     TCGv_i32 rval = tcg_temp_new_i32();
110709a52d85SRichard Henderson     TCGv_i32 lsh = tcg_temp_new_i32();
110809a52d85SRichard Henderson     TCGv_i32 rsh = tcg_temp_new_i32();
110909a52d85SRichard Henderson     TCGv_i32 zero = tcg_constant_i32(0);
111009a52d85SRichard Henderson     TCGv_i32 max = tcg_constant_i32(31);
111109a52d85SRichard Henderson 
111209a52d85SRichard Henderson     /*
111309a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
111409a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
111509a52d85SRichard Henderson      * Discard out-of-range results after the fact.
111609a52d85SRichard Henderson      */
111709a52d85SRichard Henderson     tcg_gen_ext8s_i32(lsh, shift);
111809a52d85SRichard Henderson     tcg_gen_neg_i32(rsh, lsh);
111909a52d85SRichard Henderson     tcg_gen_shl_i32(lval, src, lsh);
112009a52d85SRichard Henderson     tcg_gen_umin_i32(rsh, rsh, max);
112109a52d85SRichard Henderson     tcg_gen_sar_i32(rval, src, rsh);
112209a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
112309a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
112409a52d85SRichard Henderson }
112509a52d85SRichard Henderson 
112609a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
112709a52d85SRichard Henderson {
112809a52d85SRichard Henderson     TCGv_i64 lval = tcg_temp_new_i64();
112909a52d85SRichard Henderson     TCGv_i64 rval = tcg_temp_new_i64();
113009a52d85SRichard Henderson     TCGv_i64 lsh = tcg_temp_new_i64();
113109a52d85SRichard Henderson     TCGv_i64 rsh = tcg_temp_new_i64();
113209a52d85SRichard Henderson     TCGv_i64 zero = tcg_constant_i64(0);
113309a52d85SRichard Henderson     TCGv_i64 max = tcg_constant_i64(63);
113409a52d85SRichard Henderson 
113509a52d85SRichard Henderson     /*
113609a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
113709a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
113809a52d85SRichard Henderson      * Discard out-of-range results after the fact.
113909a52d85SRichard Henderson      */
114009a52d85SRichard Henderson     tcg_gen_ext8s_i64(lsh, shift);
114109a52d85SRichard Henderson     tcg_gen_neg_i64(rsh, lsh);
114209a52d85SRichard Henderson     tcg_gen_shl_i64(lval, src, lsh);
114309a52d85SRichard Henderson     tcg_gen_umin_i64(rsh, rsh, max);
114409a52d85SRichard Henderson     tcg_gen_sar_i64(rval, src, rsh);
114509a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
114609a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
114709a52d85SRichard Henderson }
114809a52d85SRichard Henderson 
114909a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
115009a52d85SRichard Henderson                          TCGv_vec src, TCGv_vec shift)
115109a52d85SRichard Henderson {
115209a52d85SRichard Henderson     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
115309a52d85SRichard Henderson     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
115409a52d85SRichard Henderson     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
115509a52d85SRichard Henderson     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
115609a52d85SRichard Henderson     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
115709a52d85SRichard Henderson 
115809a52d85SRichard Henderson     /*
115909a52d85SRichard Henderson      * Rely on the TCG guarantee that out of range shifts produce
116009a52d85SRichard Henderson      * unspecified results, not undefined behaviour (i.e. no trap).
116109a52d85SRichard Henderson      * Discard out-of-range results after the fact.
116209a52d85SRichard Henderson      */
116309a52d85SRichard Henderson     tcg_gen_neg_vec(vece, rsh, shift);
116409a52d85SRichard Henderson     if (vece == MO_8) {
116509a52d85SRichard Henderson         tcg_gen_mov_vec(lsh, shift);
116609a52d85SRichard Henderson     } else {
116709a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, tmp, 0xff);
116809a52d85SRichard Henderson         tcg_gen_and_vec(vece, lsh, shift, tmp);
116909a52d85SRichard Henderson         tcg_gen_and_vec(vece, rsh, rsh, tmp);
117009a52d85SRichard Henderson     }
117109a52d85SRichard Henderson 
117209a52d85SRichard Henderson     /* Bound rsh so out of bound right shift gets -1.  */
117309a52d85SRichard Henderson     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
117409a52d85SRichard Henderson     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
117509a52d85SRichard Henderson     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
117609a52d85SRichard Henderson 
117709a52d85SRichard Henderson     tcg_gen_shlv_vec(vece, lval, src, lsh);
117809a52d85SRichard Henderson     tcg_gen_sarv_vec(vece, rval, src, rsh);
117909a52d85SRichard Henderson 
118009a52d85SRichard Henderson     /* Select in-bound left shift.  */
118109a52d85SRichard Henderson     tcg_gen_andc_vec(vece, lval, lval, tmp);
118209a52d85SRichard Henderson 
118309a52d85SRichard Henderson     /* Select between left and right shift.  */
118409a52d85SRichard Henderson     if (vece == MO_8) {
118509a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, tmp, 0);
118609a52d85SRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
118709a52d85SRichard Henderson     } else {
118809a52d85SRichard Henderson         tcg_gen_dupi_vec(vece, tmp, 0x80);
118909a52d85SRichard Henderson         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
119009a52d85SRichard Henderson     }
119109a52d85SRichard Henderson }
119209a52d85SRichard Henderson 
119309a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
119409a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
119509a52d85SRichard Henderson {
119609a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
119709a52d85SRichard Henderson         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
119809a52d85SRichard Henderson         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
119909a52d85SRichard Henderson     };
120009a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
120109a52d85SRichard Henderson         { .fniv = gen_sshl_vec,
120209a52d85SRichard Henderson           .fno = gen_helper_gvec_sshl_b,
120309a52d85SRichard Henderson           .opt_opc = vecop_list,
120409a52d85SRichard Henderson           .vece = MO_8 },
120509a52d85SRichard Henderson         { .fniv = gen_sshl_vec,
120609a52d85SRichard Henderson           .fno = gen_helper_gvec_sshl_h,
120709a52d85SRichard Henderson           .opt_opc = vecop_list,
120809a52d85SRichard Henderson           .vece = MO_16 },
120909a52d85SRichard Henderson         { .fni4 = gen_sshl_i32,
121009a52d85SRichard Henderson           .fniv = gen_sshl_vec,
121109a52d85SRichard Henderson           .opt_opc = vecop_list,
121209a52d85SRichard Henderson           .vece = MO_32 },
121309a52d85SRichard Henderson         { .fni8 = gen_sshl_i64,
121409a52d85SRichard Henderson           .fniv = gen_sshl_vec,
121509a52d85SRichard Henderson           .opt_opc = vecop_list,
121609a52d85SRichard Henderson           .vece = MO_64 },
121709a52d85SRichard Henderson     };
121809a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
121909a52d85SRichard Henderson }
122009a52d85SRichard Henderson 
1221*f4fa83d6SRichard Henderson void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1222*f4fa83d6SRichard Henderson {
1223*f4fa83d6SRichard Henderson     uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
1224*f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1225*f4fa83d6SRichard Henderson 
1226*f4fa83d6SRichard Henderson     tcg_gen_add_i64(tmp, a, b);
1227*f4fa83d6SRichard Henderson     tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max));
1228*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1229*f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1230*f4fa83d6SRichard Henderson }
1231*f4fa83d6SRichard Henderson 
1232*f4fa83d6SRichard Henderson void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1233*f4fa83d6SRichard Henderson {
1234*f4fa83d6SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1235*f4fa83d6SRichard Henderson 
1236*f4fa83d6SRichard Henderson     tcg_gen_add_i64(t, a, b);
1237*f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a,
1238*f4fa83d6SRichard Henderson                         tcg_constant_i64(UINT64_MAX), t);
1239*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t, t, res);
1240*f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t);
1241*f4fa83d6SRichard Henderson }
1242*f4fa83d6SRichard Henderson 
124376f4a8aeSRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
124409a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
124509a52d85SRichard Henderson {
124609a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
124709a52d85SRichard Henderson     tcg_gen_add_vec(vece, x, a, b);
124809a52d85SRichard Henderson     tcg_gen_usadd_vec(vece, t, a, b);
124976f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
125076f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
125109a52d85SRichard Henderson }
125209a52d85SRichard Henderson 
125309a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
125409a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
125509a52d85SRichard Henderson {
125609a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
125776f4a8aeSRichard Henderson         INDEX_op_usadd_vec, INDEX_op_add_vec, 0
125809a52d85SRichard Henderson     };
125909a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
126009a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
126109a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_b,
126209a52d85SRichard Henderson           .write_aofs = true,
126309a52d85SRichard Henderson           .opt_opc = vecop_list,
126409a52d85SRichard Henderson           .vece = MO_8 },
126509a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
126609a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_h,
126709a52d85SRichard Henderson           .write_aofs = true,
126809a52d85SRichard Henderson           .opt_opc = vecop_list,
126909a52d85SRichard Henderson           .vece = MO_16 },
127009a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
127109a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_s,
127209a52d85SRichard Henderson           .write_aofs = true,
127309a52d85SRichard Henderson           .opt_opc = vecop_list,
127409a52d85SRichard Henderson           .vece = MO_32 },
127509a52d85SRichard Henderson         { .fniv = gen_uqadd_vec,
1276*f4fa83d6SRichard Henderson           .fni8 = gen_uqadd_d,
127709a52d85SRichard Henderson           .fno = gen_helper_gvec_uqadd_d,
127809a52d85SRichard Henderson           .write_aofs = true,
127909a52d85SRichard Henderson           .opt_opc = vecop_list,
128009a52d85SRichard Henderson           .vece = MO_64 },
128109a52d85SRichard Henderson     };
128201d5665bSRichard Henderson 
128301d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
128409a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
128509a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
128609a52d85SRichard Henderson }
128709a52d85SRichard Henderson 
1288*f4fa83d6SRichard Henderson void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1289*f4fa83d6SRichard Henderson {
1290*f4fa83d6SRichard Henderson     int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
1291*f4fa83d6SRichard Henderson     int64_t min = -1ll - max;
1292*f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1293*f4fa83d6SRichard Henderson 
1294*f4fa83d6SRichard Henderson     tcg_gen_add_i64(tmp, a, b);
1295*f4fa83d6SRichard Henderson     tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
1296*f4fa83d6SRichard Henderson     tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
1297*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1298*f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1299*f4fa83d6SRichard Henderson }
1300*f4fa83d6SRichard Henderson 
1301*f4fa83d6SRichard Henderson void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1302*f4fa83d6SRichard Henderson {
1303*f4fa83d6SRichard Henderson     TCGv_i64 t0 = tcg_temp_new_i64();
1304*f4fa83d6SRichard Henderson     TCGv_i64 t1 = tcg_temp_new_i64();
1305*f4fa83d6SRichard Henderson     TCGv_i64 t2 = tcg_temp_new_i64();
1306*f4fa83d6SRichard Henderson 
1307*f4fa83d6SRichard Henderson     tcg_gen_add_i64(t0, a, b);
1308*f4fa83d6SRichard Henderson 
1309*f4fa83d6SRichard Henderson     /* Compute signed overflow indication into T1 */
1310*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t1, a, b);
1311*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t2, t0, a);
1312*f4fa83d6SRichard Henderson     tcg_gen_andc_i64(t1, t2, t1);
1313*f4fa83d6SRichard Henderson 
1314*f4fa83d6SRichard Henderson     /* Compute saturated value into T2 */
1315*f4fa83d6SRichard Henderson     tcg_gen_sari_i64(t2, a, 63);
1316*f4fa83d6SRichard Henderson     tcg_gen_xori_i64(t2, t2, INT64_MAX);
1317*f4fa83d6SRichard Henderson 
1318*f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
1319*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t0, t0, res);
1320*f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t0);
1321*f4fa83d6SRichard Henderson }
1322*f4fa83d6SRichard Henderson 
132376f4a8aeSRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
132409a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
132509a52d85SRichard Henderson {
132609a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
132709a52d85SRichard Henderson     tcg_gen_add_vec(vece, x, a, b);
132809a52d85SRichard Henderson     tcg_gen_ssadd_vec(vece, t, a, b);
132976f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
133076f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
133109a52d85SRichard Henderson }
133209a52d85SRichard Henderson 
133309a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
133409a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
133509a52d85SRichard Henderson {
133609a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
133776f4a8aeSRichard Henderson         INDEX_op_ssadd_vec, INDEX_op_add_vec, 0
133809a52d85SRichard Henderson     };
133909a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
134009a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
134109a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_b,
134209a52d85SRichard Henderson           .opt_opc = vecop_list,
134309a52d85SRichard Henderson           .write_aofs = true,
134409a52d85SRichard Henderson           .vece = MO_8 },
134509a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
134609a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_h,
134709a52d85SRichard Henderson           .opt_opc = vecop_list,
134809a52d85SRichard Henderson           .write_aofs = true,
134909a52d85SRichard Henderson           .vece = MO_16 },
135009a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
135109a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_s,
135209a52d85SRichard Henderson           .opt_opc = vecop_list,
135309a52d85SRichard Henderson           .write_aofs = true,
135409a52d85SRichard Henderson           .vece = MO_32 },
135509a52d85SRichard Henderson         { .fniv = gen_sqadd_vec,
1356*f4fa83d6SRichard Henderson           .fni8 = gen_sqadd_d,
135709a52d85SRichard Henderson           .fno = gen_helper_gvec_sqadd_d,
135809a52d85SRichard Henderson           .opt_opc = vecop_list,
135909a52d85SRichard Henderson           .write_aofs = true,
136009a52d85SRichard Henderson           .vece = MO_64 },
136109a52d85SRichard Henderson     };
136201d5665bSRichard Henderson 
136301d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
136409a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
136509a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
136609a52d85SRichard Henderson }
136709a52d85SRichard Henderson 
1368*f4fa83d6SRichard Henderson void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1369*f4fa83d6SRichard Henderson {
1370*f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1371*f4fa83d6SRichard Henderson 
1372*f4fa83d6SRichard Henderson     tcg_gen_sub_i64(tmp, a, b);
1373*f4fa83d6SRichard Henderson     tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0));
1374*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1375*f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1376*f4fa83d6SRichard Henderson }
1377*f4fa83d6SRichard Henderson 
1378*f4fa83d6SRichard Henderson void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1379*f4fa83d6SRichard Henderson {
1380*f4fa83d6SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
1381*f4fa83d6SRichard Henderson 
1382*f4fa83d6SRichard Henderson     tcg_gen_sub_i64(t, a, b);
1383*f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t);
1384*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t, t, res);
1385*f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t);
1386*f4fa83d6SRichard Henderson }
1387*f4fa83d6SRichard Henderson 
138876f4a8aeSRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
138909a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
139009a52d85SRichard Henderson {
139109a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
139209a52d85SRichard Henderson     tcg_gen_sub_vec(vece, x, a, b);
139309a52d85SRichard Henderson     tcg_gen_ussub_vec(vece, t, a, b);
139476f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
139576f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
139609a52d85SRichard Henderson }
139709a52d85SRichard Henderson 
139809a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
139909a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
140009a52d85SRichard Henderson {
140109a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
140276f4a8aeSRichard Henderson         INDEX_op_ussub_vec, INDEX_op_sub_vec, 0
140309a52d85SRichard Henderson     };
140409a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
140509a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
140609a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_b,
140709a52d85SRichard Henderson           .opt_opc = vecop_list,
140809a52d85SRichard Henderson           .write_aofs = true,
140909a52d85SRichard Henderson           .vece = MO_8 },
141009a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
141109a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_h,
141209a52d85SRichard Henderson           .opt_opc = vecop_list,
141309a52d85SRichard Henderson           .write_aofs = true,
141409a52d85SRichard Henderson           .vece = MO_16 },
141509a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
141609a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_s,
141709a52d85SRichard Henderson           .opt_opc = vecop_list,
141809a52d85SRichard Henderson           .write_aofs = true,
141909a52d85SRichard Henderson           .vece = MO_32 },
142009a52d85SRichard Henderson         { .fniv = gen_uqsub_vec,
1421*f4fa83d6SRichard Henderson           .fni8 = gen_uqsub_d,
142209a52d85SRichard Henderson           .fno = gen_helper_gvec_uqsub_d,
142309a52d85SRichard Henderson           .opt_opc = vecop_list,
142409a52d85SRichard Henderson           .write_aofs = true,
142509a52d85SRichard Henderson           .vece = MO_64 },
142609a52d85SRichard Henderson     };
142701d5665bSRichard Henderson 
142801d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
142909a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
143009a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
143109a52d85SRichard Henderson }
143209a52d85SRichard Henderson 
1433*f4fa83d6SRichard Henderson void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1434*f4fa83d6SRichard Henderson {
1435*f4fa83d6SRichard Henderson     int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
1436*f4fa83d6SRichard Henderson     int64_t min = -1ll - max;
1437*f4fa83d6SRichard Henderson     TCGv_i64 tmp = tcg_temp_new_i64();
1438*f4fa83d6SRichard Henderson 
1439*f4fa83d6SRichard Henderson     tcg_gen_sub_i64(tmp, a, b);
1440*f4fa83d6SRichard Henderson     tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
1441*f4fa83d6SRichard Henderson     tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
1442*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(tmp, tmp, res);
1443*f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, tmp);
1444*f4fa83d6SRichard Henderson }
1445*f4fa83d6SRichard Henderson 
1446*f4fa83d6SRichard Henderson void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1447*f4fa83d6SRichard Henderson {
1448*f4fa83d6SRichard Henderson     TCGv_i64 t0 = tcg_temp_new_i64();
1449*f4fa83d6SRichard Henderson     TCGv_i64 t1 = tcg_temp_new_i64();
1450*f4fa83d6SRichard Henderson     TCGv_i64 t2 = tcg_temp_new_i64();
1451*f4fa83d6SRichard Henderson 
1452*f4fa83d6SRichard Henderson     tcg_gen_sub_i64(t0, a, b);
1453*f4fa83d6SRichard Henderson 
1454*f4fa83d6SRichard Henderson     /* Compute signed overflow indication into T1 */
1455*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t1, a, b);
1456*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t2, t0, a);
1457*f4fa83d6SRichard Henderson     tcg_gen_and_i64(t1, t1, t2);
1458*f4fa83d6SRichard Henderson 
1459*f4fa83d6SRichard Henderson     /* Compute saturated value into T2 */
1460*f4fa83d6SRichard Henderson     tcg_gen_sari_i64(t2, a, 63);
1461*f4fa83d6SRichard Henderson     tcg_gen_xori_i64(t2, t2, INT64_MAX);
1462*f4fa83d6SRichard Henderson 
1463*f4fa83d6SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
1464*f4fa83d6SRichard Henderson     tcg_gen_xor_i64(t0, t0, res);
1465*f4fa83d6SRichard Henderson     tcg_gen_or_i64(qc, qc, t0);
1466*f4fa83d6SRichard Henderson }
1467*f4fa83d6SRichard Henderson 
146876f4a8aeSRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
146909a52d85SRichard Henderson                           TCGv_vec a, TCGv_vec b)
147009a52d85SRichard Henderson {
147109a52d85SRichard Henderson     TCGv_vec x = tcg_temp_new_vec_matching(t);
147209a52d85SRichard Henderson     tcg_gen_sub_vec(vece, x, a, b);
147309a52d85SRichard Henderson     tcg_gen_sssub_vec(vece, t, a, b);
147476f4a8aeSRichard Henderson     tcg_gen_xor_vec(vece, x, x, t);
147576f4a8aeSRichard Henderson     tcg_gen_or_vec(vece, qc, qc, x);
147609a52d85SRichard Henderson }
147709a52d85SRichard Henderson 
147809a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
147909a52d85SRichard Henderson                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
148009a52d85SRichard Henderson {
148109a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
148276f4a8aeSRichard Henderson         INDEX_op_sssub_vec, INDEX_op_sub_vec, 0
148309a52d85SRichard Henderson     };
148409a52d85SRichard Henderson     static const GVecGen4 ops[4] = {
148509a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
148609a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_b,
148709a52d85SRichard Henderson           .opt_opc = vecop_list,
148809a52d85SRichard Henderson           .write_aofs = true,
148909a52d85SRichard Henderson           .vece = MO_8 },
149009a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
149109a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_h,
149209a52d85SRichard Henderson           .opt_opc = vecop_list,
149309a52d85SRichard Henderson           .write_aofs = true,
149409a52d85SRichard Henderson           .vece = MO_16 },
149509a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
149609a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_s,
149709a52d85SRichard Henderson           .opt_opc = vecop_list,
149809a52d85SRichard Henderson           .write_aofs = true,
149909a52d85SRichard Henderson           .vece = MO_32 },
150009a52d85SRichard Henderson         { .fniv = gen_sqsub_vec,
1501*f4fa83d6SRichard Henderson           .fni8 = gen_sqsub_d,
150209a52d85SRichard Henderson           .fno = gen_helper_gvec_sqsub_d,
150309a52d85SRichard Henderson           .opt_opc = vecop_list,
150409a52d85SRichard Henderson           .write_aofs = true,
150509a52d85SRichard Henderson           .vece = MO_64 },
150609a52d85SRichard Henderson     };
150701d5665bSRichard Henderson 
150801d5665bSRichard Henderson     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
150909a52d85SRichard Henderson     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
151009a52d85SRichard Henderson                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
151109a52d85SRichard Henderson }
151209a52d85SRichard Henderson 
151309a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
151409a52d85SRichard Henderson {
151509a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
151609a52d85SRichard Henderson 
151709a52d85SRichard Henderson     tcg_gen_sub_i32(t, a, b);
151809a52d85SRichard Henderson     tcg_gen_sub_i32(d, b, a);
151909a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
152009a52d85SRichard Henderson }
152109a52d85SRichard Henderson 
152209a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
152309a52d85SRichard Henderson {
152409a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
152509a52d85SRichard Henderson 
152609a52d85SRichard Henderson     tcg_gen_sub_i64(t, a, b);
152709a52d85SRichard Henderson     tcg_gen_sub_i64(d, b, a);
152809a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
152909a52d85SRichard Henderson }
153009a52d85SRichard Henderson 
153109a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
153209a52d85SRichard Henderson {
153309a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
153409a52d85SRichard Henderson 
153509a52d85SRichard Henderson     tcg_gen_smin_vec(vece, t, a, b);
153609a52d85SRichard Henderson     tcg_gen_smax_vec(vece, d, a, b);
153709a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
153809a52d85SRichard Henderson }
153909a52d85SRichard Henderson 
154009a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
154109a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
154209a52d85SRichard Henderson {
154309a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
154409a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
154509a52d85SRichard Henderson     };
154609a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
154709a52d85SRichard Henderson         { .fniv = gen_sabd_vec,
154809a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_b,
154909a52d85SRichard Henderson           .opt_opc = vecop_list,
155009a52d85SRichard Henderson           .vece = MO_8 },
155109a52d85SRichard Henderson         { .fniv = gen_sabd_vec,
155209a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_h,
155309a52d85SRichard Henderson           .opt_opc = vecop_list,
155409a52d85SRichard Henderson           .vece = MO_16 },
155509a52d85SRichard Henderson         { .fni4 = gen_sabd_i32,
155609a52d85SRichard Henderson           .fniv = gen_sabd_vec,
155709a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_s,
155809a52d85SRichard Henderson           .opt_opc = vecop_list,
155909a52d85SRichard Henderson           .vece = MO_32 },
156009a52d85SRichard Henderson         { .fni8 = gen_sabd_i64,
156109a52d85SRichard Henderson           .fniv = gen_sabd_vec,
156209a52d85SRichard Henderson           .fno = gen_helper_gvec_sabd_d,
156309a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
156409a52d85SRichard Henderson           .opt_opc = vecop_list,
156509a52d85SRichard Henderson           .vece = MO_64 },
156609a52d85SRichard Henderson     };
156709a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
156809a52d85SRichard Henderson }
156909a52d85SRichard Henderson 
157009a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
157109a52d85SRichard Henderson {
157209a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
157309a52d85SRichard Henderson 
157409a52d85SRichard Henderson     tcg_gen_sub_i32(t, a, b);
157509a52d85SRichard Henderson     tcg_gen_sub_i32(d, b, a);
157609a52d85SRichard Henderson     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
157709a52d85SRichard Henderson }
157809a52d85SRichard Henderson 
157909a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
158009a52d85SRichard Henderson {
158109a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
158209a52d85SRichard Henderson 
158309a52d85SRichard Henderson     tcg_gen_sub_i64(t, a, b);
158409a52d85SRichard Henderson     tcg_gen_sub_i64(d, b, a);
158509a52d85SRichard Henderson     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
158609a52d85SRichard Henderson }
158709a52d85SRichard Henderson 
158809a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
158909a52d85SRichard Henderson {
159009a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
159109a52d85SRichard Henderson 
159209a52d85SRichard Henderson     tcg_gen_umin_vec(vece, t, a, b);
159309a52d85SRichard Henderson     tcg_gen_umax_vec(vece, d, a, b);
159409a52d85SRichard Henderson     tcg_gen_sub_vec(vece, d, d, t);
159509a52d85SRichard Henderson }
159609a52d85SRichard Henderson 
159709a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
159809a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
159909a52d85SRichard Henderson {
160009a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
160109a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
160209a52d85SRichard Henderson     };
160309a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
160409a52d85SRichard Henderson         { .fniv = gen_uabd_vec,
160509a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_b,
160609a52d85SRichard Henderson           .opt_opc = vecop_list,
160709a52d85SRichard Henderson           .vece = MO_8 },
160809a52d85SRichard Henderson         { .fniv = gen_uabd_vec,
160909a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_h,
161009a52d85SRichard Henderson           .opt_opc = vecop_list,
161109a52d85SRichard Henderson           .vece = MO_16 },
161209a52d85SRichard Henderson         { .fni4 = gen_uabd_i32,
161309a52d85SRichard Henderson           .fniv = gen_uabd_vec,
161409a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_s,
161509a52d85SRichard Henderson           .opt_opc = vecop_list,
161609a52d85SRichard Henderson           .vece = MO_32 },
161709a52d85SRichard Henderson         { .fni8 = gen_uabd_i64,
161809a52d85SRichard Henderson           .fniv = gen_uabd_vec,
161909a52d85SRichard Henderson           .fno = gen_helper_gvec_uabd_d,
162009a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
162109a52d85SRichard Henderson           .opt_opc = vecop_list,
162209a52d85SRichard Henderson           .vece = MO_64 },
162309a52d85SRichard Henderson     };
162409a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
162509a52d85SRichard Henderson }
162609a52d85SRichard Henderson 
162709a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
162809a52d85SRichard Henderson {
162909a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
163009a52d85SRichard Henderson     gen_sabd_i32(t, a, b);
163109a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
163209a52d85SRichard Henderson }
163309a52d85SRichard Henderson 
163409a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
163509a52d85SRichard Henderson {
163609a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
163709a52d85SRichard Henderson     gen_sabd_i64(t, a, b);
163809a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
163909a52d85SRichard Henderson }
164009a52d85SRichard Henderson 
164109a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
164209a52d85SRichard Henderson {
164309a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
164409a52d85SRichard Henderson     gen_sabd_vec(vece, t, a, b);
164509a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
164609a52d85SRichard Henderson }
164709a52d85SRichard Henderson 
164809a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
164909a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
165009a52d85SRichard Henderson {
165109a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
165209a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_add_vec,
165309a52d85SRichard Henderson         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
165409a52d85SRichard Henderson     };
165509a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
165609a52d85SRichard Henderson         { .fniv = gen_saba_vec,
165709a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_b,
165809a52d85SRichard Henderson           .opt_opc = vecop_list,
165909a52d85SRichard Henderson           .load_dest = true,
166009a52d85SRichard Henderson           .vece = MO_8 },
166109a52d85SRichard Henderson         { .fniv = gen_saba_vec,
166209a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_h,
166309a52d85SRichard Henderson           .opt_opc = vecop_list,
166409a52d85SRichard Henderson           .load_dest = true,
166509a52d85SRichard Henderson           .vece = MO_16 },
166609a52d85SRichard Henderson         { .fni4 = gen_saba_i32,
166709a52d85SRichard Henderson           .fniv = gen_saba_vec,
166809a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_s,
166909a52d85SRichard Henderson           .opt_opc = vecop_list,
167009a52d85SRichard Henderson           .load_dest = true,
167109a52d85SRichard Henderson           .vece = MO_32 },
167209a52d85SRichard Henderson         { .fni8 = gen_saba_i64,
167309a52d85SRichard Henderson           .fniv = gen_saba_vec,
167409a52d85SRichard Henderson           .fno = gen_helper_gvec_saba_d,
167509a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
167609a52d85SRichard Henderson           .opt_opc = vecop_list,
167709a52d85SRichard Henderson           .load_dest = true,
167809a52d85SRichard Henderson           .vece = MO_64 },
167909a52d85SRichard Henderson     };
168009a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
168109a52d85SRichard Henderson }
168209a52d85SRichard Henderson 
168309a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
168409a52d85SRichard Henderson {
168509a52d85SRichard Henderson     TCGv_i32 t = tcg_temp_new_i32();
168609a52d85SRichard Henderson     gen_uabd_i32(t, a, b);
168709a52d85SRichard Henderson     tcg_gen_add_i32(d, d, t);
168809a52d85SRichard Henderson }
168909a52d85SRichard Henderson 
169009a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
169109a52d85SRichard Henderson {
169209a52d85SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
169309a52d85SRichard Henderson     gen_uabd_i64(t, a, b);
169409a52d85SRichard Henderson     tcg_gen_add_i64(d, d, t);
169509a52d85SRichard Henderson }
169609a52d85SRichard Henderson 
169709a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
169809a52d85SRichard Henderson {
169909a52d85SRichard Henderson     TCGv_vec t = tcg_temp_new_vec_matching(d);
170009a52d85SRichard Henderson     gen_uabd_vec(vece, t, a, b);
170109a52d85SRichard Henderson     tcg_gen_add_vec(vece, d, d, t);
170209a52d85SRichard Henderson }
170309a52d85SRichard Henderson 
170409a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
170509a52d85SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
170609a52d85SRichard Henderson {
170709a52d85SRichard Henderson     static const TCGOpcode vecop_list[] = {
170809a52d85SRichard Henderson         INDEX_op_sub_vec, INDEX_op_add_vec,
170909a52d85SRichard Henderson         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
171009a52d85SRichard Henderson     };
171109a52d85SRichard Henderson     static const GVecGen3 ops[4] = {
171209a52d85SRichard Henderson         { .fniv = gen_uaba_vec,
171309a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_b,
171409a52d85SRichard Henderson           .opt_opc = vecop_list,
171509a52d85SRichard Henderson           .load_dest = true,
171609a52d85SRichard Henderson           .vece = MO_8 },
171709a52d85SRichard Henderson         { .fniv = gen_uaba_vec,
171809a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_h,
171909a52d85SRichard Henderson           .opt_opc = vecop_list,
172009a52d85SRichard Henderson           .load_dest = true,
172109a52d85SRichard Henderson           .vece = MO_16 },
172209a52d85SRichard Henderson         { .fni4 = gen_uaba_i32,
172309a52d85SRichard Henderson           .fniv = gen_uaba_vec,
172409a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_s,
172509a52d85SRichard Henderson           .opt_opc = vecop_list,
172609a52d85SRichard Henderson           .load_dest = true,
172709a52d85SRichard Henderson           .vece = MO_32 },
172809a52d85SRichard Henderson         { .fni8 = gen_uaba_i64,
172909a52d85SRichard Henderson           .fniv = gen_uaba_vec,
173009a52d85SRichard Henderson           .fno = gen_helper_gvec_uaba_d,
173109a52d85SRichard Henderson           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
173209a52d85SRichard Henderson           .opt_opc = vecop_list,
173309a52d85SRichard Henderson           .load_dest = true,
173409a52d85SRichard Henderson           .vece = MO_64 },
173509a52d85SRichard Henderson     };
173609a52d85SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
173709a52d85SRichard Henderson }
1738a7e4eec6SRichard Henderson 
1739a7e4eec6SRichard Henderson void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1740a7e4eec6SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1741a7e4eec6SRichard Henderson {
1742a7e4eec6SRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
1743a7e4eec6SRichard Henderson         gen_helper_gvec_addp_b,
1744a7e4eec6SRichard Henderson         gen_helper_gvec_addp_h,
1745a7e4eec6SRichard Henderson         gen_helper_gvec_addp_s,
1746a7e4eec6SRichard Henderson         gen_helper_gvec_addp_d,
1747a7e4eec6SRichard Henderson     };
1748a7e4eec6SRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1749a7e4eec6SRichard Henderson }
175028b5451bSRichard Henderson 
175128b5451bSRichard Henderson void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
175228b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
175328b5451bSRichard Henderson {
175428b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
175528b5451bSRichard Henderson         gen_helper_gvec_smaxp_b,
175628b5451bSRichard Henderson         gen_helper_gvec_smaxp_h,
175728b5451bSRichard Henderson         gen_helper_gvec_smaxp_s,
175828b5451bSRichard Henderson     };
175928b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
176028b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
176128b5451bSRichard Henderson }
176228b5451bSRichard Henderson 
176328b5451bSRichard Henderson void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
176428b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
176528b5451bSRichard Henderson {
176628b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
176728b5451bSRichard Henderson         gen_helper_gvec_sminp_b,
176828b5451bSRichard Henderson         gen_helper_gvec_sminp_h,
176928b5451bSRichard Henderson         gen_helper_gvec_sminp_s,
177028b5451bSRichard Henderson     };
177128b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
177228b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
177328b5451bSRichard Henderson }
177428b5451bSRichard Henderson 
177528b5451bSRichard Henderson void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
177628b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
177728b5451bSRichard Henderson {
177828b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
177928b5451bSRichard Henderson         gen_helper_gvec_umaxp_b,
178028b5451bSRichard Henderson         gen_helper_gvec_umaxp_h,
178128b5451bSRichard Henderson         gen_helper_gvec_umaxp_s,
178228b5451bSRichard Henderson     };
178328b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
178428b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
178528b5451bSRichard Henderson }
178628b5451bSRichard Henderson 
178728b5451bSRichard Henderson void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
178828b5451bSRichard Henderson                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
178928b5451bSRichard Henderson {
179028b5451bSRichard Henderson     static gen_helper_gvec_3 * const fns[4] = {
179128b5451bSRichard Henderson         gen_helper_gvec_uminp_b,
179228b5451bSRichard Henderson         gen_helper_gvec_uminp_h,
179328b5451bSRichard Henderson         gen_helper_gvec_uminp_s,
179428b5451bSRichard Henderson     };
179528b5451bSRichard Henderson     tcg_debug_assert(vece <= MO_32);
179628b5451bSRichard Henderson     tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
179728b5451bSRichard Henderson }
1798