109a52d85SRichard Henderson /* 209a52d85SRichard Henderson * ARM generic vector expansion 309a52d85SRichard Henderson * 409a52d85SRichard Henderson * Copyright (c) 2003 Fabrice Bellard 509a52d85SRichard Henderson * Copyright (c) 2005-2007 CodeSourcery 609a52d85SRichard Henderson * Copyright (c) 2007 OpenedHand, Ltd. 709a52d85SRichard Henderson * 809a52d85SRichard Henderson * This library is free software; you can redistribute it and/or 909a52d85SRichard Henderson * modify it under the terms of the GNU Lesser General Public 1009a52d85SRichard Henderson * License as published by the Free Software Foundation; either 1109a52d85SRichard Henderson * version 2.1 of the License, or (at your option) any later version. 1209a52d85SRichard Henderson * 1309a52d85SRichard Henderson * This library is distributed in the hope that it will be useful, 1409a52d85SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 1509a52d85SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1609a52d85SRichard Henderson * Lesser General Public License for more details. 1709a52d85SRichard Henderson * 1809a52d85SRichard Henderson * You should have received a copy of the GNU Lesser General Public 1909a52d85SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 2009a52d85SRichard Henderson */ 2109a52d85SRichard Henderson 2209a52d85SRichard Henderson #include "qemu/osdep.h" 2309a52d85SRichard Henderson #include "translate.h" 2409a52d85SRichard Henderson 2509a52d85SRichard Henderson 2609a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 2709a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz, 2809a52d85SRichard Henderson gen_helper_gvec_3_ptr *fn) 2909a52d85SRichard Henderson { 3009a52d85SRichard Henderson TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 3109a52d85SRichard Henderson 3201d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 3309a52d85SRichard Henderson tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 3409a52d85SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 3509a52d85SRichard Henderson opr_sz, max_sz, 0, fn); 3609a52d85SRichard Henderson } 3709a52d85SRichard Henderson 3809a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 3909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 4009a52d85SRichard Henderson { 4109a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 4209a52d85SRichard Henderson gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 4309a52d85SRichard Henderson }; 4409a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 4509a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 4609a52d85SRichard Henderson } 4709a52d85SRichard Henderson 4809a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 4909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 5009a52d85SRichard Henderson { 5109a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 5209a52d85SRichard Henderson gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 5309a52d85SRichard Henderson }; 5409a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 5509a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 5609a52d85SRichard Henderson } 5709a52d85SRichard Henderson 5809a52d85SRichard Henderson #define GEN_CMP0(NAME, COND) \ 5909a52d85SRichard Henderson void NAME(unsigned vece, uint32_t d, uint32_t m, \ 6009a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz) \ 6109a52d85SRichard Henderson { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 6209a52d85SRichard Henderson 6309a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 6409a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 6509a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 6609a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 6709a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 6809a52d85SRichard Henderson 6909a52d85SRichard Henderson #undef GEN_CMP0 7009a52d85SRichard Henderson 7109a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 7209a52d85SRichard Henderson { 7309a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, shift); 7409a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 7509a52d85SRichard Henderson } 7609a52d85SRichard Henderson 7709a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 7809a52d85SRichard Henderson { 7909a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, shift); 8009a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 8109a52d85SRichard Henderson } 8209a52d85SRichard Henderson 8309a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 8409a52d85SRichard Henderson { 8509a52d85SRichard Henderson tcg_gen_sari_i32(a, a, shift); 8609a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 8709a52d85SRichard Henderson } 8809a52d85SRichard Henderson 8909a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 9009a52d85SRichard Henderson { 9109a52d85SRichard Henderson tcg_gen_sari_i64(a, a, shift); 9209a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 9309a52d85SRichard Henderson } 9409a52d85SRichard Henderson 9509a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 9609a52d85SRichard Henderson { 9709a52d85SRichard Henderson tcg_gen_sari_vec(vece, a, a, sh); 9809a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 9909a52d85SRichard Henderson } 10009a52d85SRichard Henderson 10109a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 10209a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 10309a52d85SRichard Henderson { 10409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 10509a52d85SRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 10609a52d85SRichard Henderson }; 10709a52d85SRichard Henderson static const GVecGen2i ops[4] = { 10809a52d85SRichard Henderson { .fni8 = gen_ssra8_i64, 10909a52d85SRichard Henderson .fniv = gen_ssra_vec, 11009a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_b, 11109a52d85SRichard Henderson .load_dest = true, 11209a52d85SRichard Henderson .opt_opc = vecop_list, 11309a52d85SRichard Henderson .vece = MO_8 }, 11409a52d85SRichard Henderson { .fni8 = gen_ssra16_i64, 11509a52d85SRichard Henderson .fniv = gen_ssra_vec, 11609a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_h, 11709a52d85SRichard Henderson .load_dest = true, 11809a52d85SRichard Henderson .opt_opc = vecop_list, 11909a52d85SRichard Henderson .vece = MO_16 }, 12009a52d85SRichard Henderson { .fni4 = gen_ssra32_i32, 12109a52d85SRichard Henderson .fniv = gen_ssra_vec, 12209a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_s, 12309a52d85SRichard Henderson .load_dest = true, 12409a52d85SRichard Henderson .opt_opc = vecop_list, 12509a52d85SRichard Henderson .vece = MO_32 }, 12609a52d85SRichard Henderson { .fni8 = gen_ssra64_i64, 12709a52d85SRichard Henderson .fniv = gen_ssra_vec, 12809a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_d, 12909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 13009a52d85SRichard Henderson .opt_opc = vecop_list, 13109a52d85SRichard Henderson .load_dest = true, 13209a52d85SRichard Henderson .vece = MO_64 }, 13309a52d85SRichard Henderson }; 13409a52d85SRichard Henderson 13509a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 13609a52d85SRichard Henderson tcg_debug_assert(shift > 0); 13709a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 13809a52d85SRichard Henderson 13909a52d85SRichard Henderson /* 14009a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 14109a52d85SRichard Henderson * Signed results in all sign bits. 14209a52d85SRichard Henderson */ 14309a52d85SRichard Henderson shift = MIN(shift, (8 << vece) - 1); 14409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 14509a52d85SRichard Henderson } 14609a52d85SRichard Henderson 14709a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 14809a52d85SRichard Henderson { 14909a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, shift); 15009a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 15109a52d85SRichard Henderson } 15209a52d85SRichard Henderson 15309a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 15409a52d85SRichard Henderson { 15509a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, shift); 15609a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 15709a52d85SRichard Henderson } 15809a52d85SRichard Henderson 15909a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 16009a52d85SRichard Henderson { 16109a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 16209a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 16309a52d85SRichard Henderson } 16409a52d85SRichard Henderson 16509a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 16609a52d85SRichard Henderson { 16709a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 16809a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 16909a52d85SRichard Henderson } 17009a52d85SRichard Henderson 17109a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 17209a52d85SRichard Henderson { 17309a52d85SRichard Henderson tcg_gen_shri_vec(vece, a, a, sh); 17409a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 17509a52d85SRichard Henderson } 17609a52d85SRichard Henderson 17709a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 17809a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 17909a52d85SRichard Henderson { 18009a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 18109a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 18209a52d85SRichard Henderson }; 18309a52d85SRichard Henderson static const GVecGen2i ops[4] = { 18409a52d85SRichard Henderson { .fni8 = gen_usra8_i64, 18509a52d85SRichard Henderson .fniv = gen_usra_vec, 18609a52d85SRichard Henderson .fno = gen_helper_gvec_usra_b, 18709a52d85SRichard Henderson .load_dest = true, 18809a52d85SRichard Henderson .opt_opc = vecop_list, 18909a52d85SRichard Henderson .vece = MO_8, }, 19009a52d85SRichard Henderson { .fni8 = gen_usra16_i64, 19109a52d85SRichard Henderson .fniv = gen_usra_vec, 19209a52d85SRichard Henderson .fno = gen_helper_gvec_usra_h, 19309a52d85SRichard Henderson .load_dest = true, 19409a52d85SRichard Henderson .opt_opc = vecop_list, 19509a52d85SRichard Henderson .vece = MO_16, }, 19609a52d85SRichard Henderson { .fni4 = gen_usra32_i32, 19709a52d85SRichard Henderson .fniv = gen_usra_vec, 19809a52d85SRichard Henderson .fno = gen_helper_gvec_usra_s, 19909a52d85SRichard Henderson .load_dest = true, 20009a52d85SRichard Henderson .opt_opc = vecop_list, 20109a52d85SRichard Henderson .vece = MO_32, }, 20209a52d85SRichard Henderson { .fni8 = gen_usra64_i64, 20309a52d85SRichard Henderson .fniv = gen_usra_vec, 20409a52d85SRichard Henderson .fno = gen_helper_gvec_usra_d, 20509a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 20609a52d85SRichard Henderson .load_dest = true, 20709a52d85SRichard Henderson .opt_opc = vecop_list, 20809a52d85SRichard Henderson .vece = MO_64, }, 20909a52d85SRichard Henderson }; 21009a52d85SRichard Henderson 21109a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 21209a52d85SRichard Henderson tcg_debug_assert(shift > 0); 21309a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 21409a52d85SRichard Henderson 21509a52d85SRichard Henderson /* 21609a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 21709a52d85SRichard Henderson * Unsigned results in all zeros as input to accumulate: nop. 21809a52d85SRichard Henderson */ 21909a52d85SRichard Henderson if (shift < (8 << vece)) { 22009a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 22109a52d85SRichard Henderson } else { 22209a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 22309a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 22409a52d85SRichard Henderson } 22509a52d85SRichard Henderson } 22609a52d85SRichard Henderson 22709a52d85SRichard Henderson /* 22809a52d85SRichard Henderson * Shift one less than the requested amount, and the low bit is 22909a52d85SRichard Henderson * the rounding bit. For the 8 and 16-bit operations, because we 23009a52d85SRichard Henderson * mask the low bit, we can perform a normal integer shift instead 23109a52d85SRichard Henderson * of a vector shift. 23209a52d85SRichard Henderson */ 23309a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 23409a52d85SRichard Henderson { 23509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 23609a52d85SRichard Henderson 23709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 23809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 23909a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(d, a, sh); 24009a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 24109a52d85SRichard Henderson } 24209a52d85SRichard Henderson 24309a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 24409a52d85SRichard Henderson { 24509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 24609a52d85SRichard Henderson 24709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 24809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 24909a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(d, a, sh); 25009a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 25109a52d85SRichard Henderson } 25209a52d85SRichard Henderson 25309a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 25409a52d85SRichard Henderson { 25509a52d85SRichard Henderson TCGv_i32 t; 25609a52d85SRichard Henderson 25709a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 25809a52d85SRichard Henderson if (sh == 32) { 25909a52d85SRichard Henderson tcg_gen_movi_i32(d, 0); 26009a52d85SRichard Henderson return; 26109a52d85SRichard Henderson } 26209a52d85SRichard Henderson t = tcg_temp_new_i32(); 26309a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 26409a52d85SRichard Henderson tcg_gen_sari_i32(d, a, sh); 26509a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 26609a52d85SRichard Henderson } 26709a52d85SRichard Henderson 26809a52d85SRichard Henderson void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 26909a52d85SRichard Henderson { 27009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 27109a52d85SRichard Henderson 27209a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 27309a52d85SRichard Henderson tcg_gen_sari_i64(d, a, sh); 27409a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 27509a52d85SRichard Henderson } 27609a52d85SRichard Henderson 27709a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 27809a52d85SRichard Henderson { 27909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 28009a52d85SRichard Henderson TCGv_vec ones = tcg_temp_new_vec_matching(d); 28109a52d85SRichard Henderson 28209a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 28309a52d85SRichard Henderson tcg_gen_dupi_vec(vece, ones, 1); 28409a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 28509a52d85SRichard Henderson tcg_gen_sari_vec(vece, d, a, sh); 28609a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 28709a52d85SRichard Henderson } 28809a52d85SRichard Henderson 28909a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 29009a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 29109a52d85SRichard Henderson { 29209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 29309a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 29409a52d85SRichard Henderson }; 29509a52d85SRichard Henderson static const GVecGen2i ops[4] = { 29609a52d85SRichard Henderson { .fni8 = gen_srshr8_i64, 29709a52d85SRichard Henderson .fniv = gen_srshr_vec, 29809a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_b, 29909a52d85SRichard Henderson .opt_opc = vecop_list, 30009a52d85SRichard Henderson .vece = MO_8 }, 30109a52d85SRichard Henderson { .fni8 = gen_srshr16_i64, 30209a52d85SRichard Henderson .fniv = gen_srshr_vec, 30309a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_h, 30409a52d85SRichard Henderson .opt_opc = vecop_list, 30509a52d85SRichard Henderson .vece = MO_16 }, 30609a52d85SRichard Henderson { .fni4 = gen_srshr32_i32, 30709a52d85SRichard Henderson .fniv = gen_srshr_vec, 30809a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_s, 30909a52d85SRichard Henderson .opt_opc = vecop_list, 31009a52d85SRichard Henderson .vece = MO_32 }, 31109a52d85SRichard Henderson { .fni8 = gen_srshr64_i64, 31209a52d85SRichard Henderson .fniv = gen_srshr_vec, 31309a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_d, 31409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 31509a52d85SRichard Henderson .opt_opc = vecop_list, 31609a52d85SRichard Henderson .vece = MO_64 }, 31709a52d85SRichard Henderson }; 31809a52d85SRichard Henderson 31909a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 32009a52d85SRichard Henderson tcg_debug_assert(shift > 0); 32109a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 32209a52d85SRichard Henderson 32309a52d85SRichard Henderson if (shift == (8 << vece)) { 32409a52d85SRichard Henderson /* 32509a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 32609a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 32709a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 32809a52d85SRichard Henderson * I.e. always zero. 32909a52d85SRichard Henderson */ 33009a52d85SRichard Henderson tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 33109a52d85SRichard Henderson } else { 33209a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 33309a52d85SRichard Henderson } 33409a52d85SRichard Henderson } 33509a52d85SRichard Henderson 33609a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 33709a52d85SRichard Henderson { 33809a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 33909a52d85SRichard Henderson 34009a52d85SRichard Henderson gen_srshr8_i64(t, a, sh); 34109a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 34209a52d85SRichard Henderson } 34309a52d85SRichard Henderson 34409a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 34509a52d85SRichard Henderson { 34609a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 34709a52d85SRichard Henderson 34809a52d85SRichard Henderson gen_srshr16_i64(t, a, sh); 34909a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 35009a52d85SRichard Henderson } 35109a52d85SRichard Henderson 35209a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 35309a52d85SRichard Henderson { 35409a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 35509a52d85SRichard Henderson 35609a52d85SRichard Henderson gen_srshr32_i32(t, a, sh); 35709a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 35809a52d85SRichard Henderson } 35909a52d85SRichard Henderson 36009a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 36109a52d85SRichard Henderson { 36209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 36309a52d85SRichard Henderson 36409a52d85SRichard Henderson gen_srshr64_i64(t, a, sh); 36509a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 36609a52d85SRichard Henderson } 36709a52d85SRichard Henderson 36809a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 36909a52d85SRichard Henderson { 37009a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 37109a52d85SRichard Henderson 37209a52d85SRichard Henderson gen_srshr_vec(vece, t, a, sh); 37309a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 37409a52d85SRichard Henderson } 37509a52d85SRichard Henderson 37609a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 37709a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 37809a52d85SRichard Henderson { 37909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 38009a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 38109a52d85SRichard Henderson }; 38209a52d85SRichard Henderson static const GVecGen2i ops[4] = { 38309a52d85SRichard Henderson { .fni8 = gen_srsra8_i64, 38409a52d85SRichard Henderson .fniv = gen_srsra_vec, 38509a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_b, 38609a52d85SRichard Henderson .opt_opc = vecop_list, 38709a52d85SRichard Henderson .load_dest = true, 38809a52d85SRichard Henderson .vece = MO_8 }, 38909a52d85SRichard Henderson { .fni8 = gen_srsra16_i64, 39009a52d85SRichard Henderson .fniv = gen_srsra_vec, 39109a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_h, 39209a52d85SRichard Henderson .opt_opc = vecop_list, 39309a52d85SRichard Henderson .load_dest = true, 39409a52d85SRichard Henderson .vece = MO_16 }, 39509a52d85SRichard Henderson { .fni4 = gen_srsra32_i32, 39609a52d85SRichard Henderson .fniv = gen_srsra_vec, 39709a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_s, 39809a52d85SRichard Henderson .opt_opc = vecop_list, 39909a52d85SRichard Henderson .load_dest = true, 40009a52d85SRichard Henderson .vece = MO_32 }, 40109a52d85SRichard Henderson { .fni8 = gen_srsra64_i64, 40209a52d85SRichard Henderson .fniv = gen_srsra_vec, 40309a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_d, 40409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 40509a52d85SRichard Henderson .opt_opc = vecop_list, 40609a52d85SRichard Henderson .load_dest = true, 40709a52d85SRichard Henderson .vece = MO_64 }, 40809a52d85SRichard Henderson }; 40909a52d85SRichard Henderson 41009a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 41109a52d85SRichard Henderson tcg_debug_assert(shift > 0); 41209a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 41309a52d85SRichard Henderson 41409a52d85SRichard Henderson /* 41509a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 41609a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 41709a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 41809a52d85SRichard Henderson * I.e. always zero. With accumulation, this leaves D unchanged. 41909a52d85SRichard Henderson */ 42009a52d85SRichard Henderson if (shift == (8 << vece)) { 42109a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 42209a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 42309a52d85SRichard Henderson } else { 42409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 42509a52d85SRichard Henderson } 42609a52d85SRichard Henderson } 42709a52d85SRichard Henderson 42809a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 42909a52d85SRichard Henderson { 43009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 43109a52d85SRichard Henderson 43209a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 43309a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 43409a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(d, a, sh); 43509a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 43609a52d85SRichard Henderson } 43709a52d85SRichard Henderson 43809a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 43909a52d85SRichard Henderson { 44009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 44109a52d85SRichard Henderson 44209a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 44309a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 44409a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(d, a, sh); 44509a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 44609a52d85SRichard Henderson } 44709a52d85SRichard Henderson 44809a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 44909a52d85SRichard Henderson { 45009a52d85SRichard Henderson TCGv_i32 t; 45109a52d85SRichard Henderson 45209a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 45309a52d85SRichard Henderson if (sh == 32) { 45409a52d85SRichard Henderson tcg_gen_extract_i32(d, a, sh - 1, 1); 45509a52d85SRichard Henderson return; 45609a52d85SRichard Henderson } 45709a52d85SRichard Henderson t = tcg_temp_new_i32(); 45809a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 45909a52d85SRichard Henderson tcg_gen_shri_i32(d, a, sh); 46009a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 46109a52d85SRichard Henderson } 46209a52d85SRichard Henderson 46309a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 46409a52d85SRichard Henderson { 46509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 46609a52d85SRichard Henderson 46709a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 46809a52d85SRichard Henderson tcg_gen_shri_i64(d, a, sh); 46909a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 47009a52d85SRichard Henderson } 47109a52d85SRichard Henderson 47209a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 47309a52d85SRichard Henderson { 47409a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 47509a52d85SRichard Henderson TCGv_vec ones = tcg_temp_new_vec_matching(d); 47609a52d85SRichard Henderson 47709a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, shift - 1); 47809a52d85SRichard Henderson tcg_gen_dupi_vec(vece, ones, 1); 47909a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 48009a52d85SRichard Henderson tcg_gen_shri_vec(vece, d, a, shift); 48109a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 48209a52d85SRichard Henderson } 48309a52d85SRichard Henderson 48409a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 48509a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 48609a52d85SRichard Henderson { 48709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 48809a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 48909a52d85SRichard Henderson }; 49009a52d85SRichard Henderson static const GVecGen2i ops[4] = { 49109a52d85SRichard Henderson { .fni8 = gen_urshr8_i64, 49209a52d85SRichard Henderson .fniv = gen_urshr_vec, 49309a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_b, 49409a52d85SRichard Henderson .opt_opc = vecop_list, 49509a52d85SRichard Henderson .vece = MO_8 }, 49609a52d85SRichard Henderson { .fni8 = gen_urshr16_i64, 49709a52d85SRichard Henderson .fniv = gen_urshr_vec, 49809a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_h, 49909a52d85SRichard Henderson .opt_opc = vecop_list, 50009a52d85SRichard Henderson .vece = MO_16 }, 50109a52d85SRichard Henderson { .fni4 = gen_urshr32_i32, 50209a52d85SRichard Henderson .fniv = gen_urshr_vec, 50309a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_s, 50409a52d85SRichard Henderson .opt_opc = vecop_list, 50509a52d85SRichard Henderson .vece = MO_32 }, 50609a52d85SRichard Henderson { .fni8 = gen_urshr64_i64, 50709a52d85SRichard Henderson .fniv = gen_urshr_vec, 50809a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_d, 50909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 51009a52d85SRichard Henderson .opt_opc = vecop_list, 51109a52d85SRichard Henderson .vece = MO_64 }, 51209a52d85SRichard Henderson }; 51309a52d85SRichard Henderson 51409a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 51509a52d85SRichard Henderson tcg_debug_assert(shift > 0); 51609a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 51709a52d85SRichard Henderson 51809a52d85SRichard Henderson if (shift == (8 << vece)) { 51909a52d85SRichard Henderson /* 52009a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 52109a52d85SRichard Henderson * Unsigned results in zero. With rounding, this produces a 52209a52d85SRichard Henderson * copy of the most significant bit. 52309a52d85SRichard Henderson */ 52409a52d85SRichard Henderson tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 52509a52d85SRichard Henderson } else { 52609a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 52709a52d85SRichard Henderson } 52809a52d85SRichard Henderson } 52909a52d85SRichard Henderson 53009a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 53109a52d85SRichard Henderson { 53209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 53309a52d85SRichard Henderson 53409a52d85SRichard Henderson if (sh == 8) { 53509a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(t, a, 7); 53609a52d85SRichard Henderson } else { 53709a52d85SRichard Henderson gen_urshr8_i64(t, a, sh); 53809a52d85SRichard Henderson } 53909a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 54009a52d85SRichard Henderson } 54109a52d85SRichard Henderson 54209a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 54309a52d85SRichard Henderson { 54409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 54509a52d85SRichard Henderson 54609a52d85SRichard Henderson if (sh == 16) { 54709a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(t, a, 15); 54809a52d85SRichard Henderson } else { 54909a52d85SRichard Henderson gen_urshr16_i64(t, a, sh); 55009a52d85SRichard Henderson } 55109a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 55209a52d85SRichard Henderson } 55309a52d85SRichard Henderson 55409a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 55509a52d85SRichard Henderson { 55609a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 55709a52d85SRichard Henderson 55809a52d85SRichard Henderson if (sh == 32) { 55909a52d85SRichard Henderson tcg_gen_shri_i32(t, a, 31); 56009a52d85SRichard Henderson } else { 56109a52d85SRichard Henderson gen_urshr32_i32(t, a, sh); 56209a52d85SRichard Henderson } 56309a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 56409a52d85SRichard Henderson } 56509a52d85SRichard Henderson 56609a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 56709a52d85SRichard Henderson { 56809a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 56909a52d85SRichard Henderson 57009a52d85SRichard Henderson if (sh == 64) { 57109a52d85SRichard Henderson tcg_gen_shri_i64(t, a, 63); 57209a52d85SRichard Henderson } else { 57309a52d85SRichard Henderson gen_urshr64_i64(t, a, sh); 57409a52d85SRichard Henderson } 57509a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 57609a52d85SRichard Henderson } 57709a52d85SRichard Henderson 57809a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 57909a52d85SRichard Henderson { 58009a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 58109a52d85SRichard Henderson 58209a52d85SRichard Henderson if (sh == (8 << vece)) { 58309a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 58409a52d85SRichard Henderson } else { 58509a52d85SRichard Henderson gen_urshr_vec(vece, t, a, sh); 58609a52d85SRichard Henderson } 58709a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 58809a52d85SRichard Henderson } 58909a52d85SRichard Henderson 59009a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 59109a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 59209a52d85SRichard Henderson { 59309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 59409a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 59509a52d85SRichard Henderson }; 59609a52d85SRichard Henderson static const GVecGen2i ops[4] = { 59709a52d85SRichard Henderson { .fni8 = gen_ursra8_i64, 59809a52d85SRichard Henderson .fniv = gen_ursra_vec, 59909a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_b, 60009a52d85SRichard Henderson .opt_opc = vecop_list, 60109a52d85SRichard Henderson .load_dest = true, 60209a52d85SRichard Henderson .vece = MO_8 }, 60309a52d85SRichard Henderson { .fni8 = gen_ursra16_i64, 60409a52d85SRichard Henderson .fniv = gen_ursra_vec, 60509a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_h, 60609a52d85SRichard Henderson .opt_opc = vecop_list, 60709a52d85SRichard Henderson .load_dest = true, 60809a52d85SRichard Henderson .vece = MO_16 }, 60909a52d85SRichard Henderson { .fni4 = gen_ursra32_i32, 61009a52d85SRichard Henderson .fniv = gen_ursra_vec, 61109a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_s, 61209a52d85SRichard Henderson .opt_opc = vecop_list, 61309a52d85SRichard Henderson .load_dest = true, 61409a52d85SRichard Henderson .vece = MO_32 }, 61509a52d85SRichard Henderson { .fni8 = gen_ursra64_i64, 61609a52d85SRichard Henderson .fniv = gen_ursra_vec, 61709a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_d, 61809a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 61909a52d85SRichard Henderson .opt_opc = vecop_list, 62009a52d85SRichard Henderson .load_dest = true, 62109a52d85SRichard Henderson .vece = MO_64 }, 62209a52d85SRichard Henderson }; 62309a52d85SRichard Henderson 62409a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 62509a52d85SRichard Henderson tcg_debug_assert(shift > 0); 62609a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 62709a52d85SRichard Henderson 62809a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 62909a52d85SRichard Henderson } 63009a52d85SRichard Henderson 63109a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 63209a52d85SRichard Henderson { 63309a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff >> shift); 63409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 63509a52d85SRichard Henderson 63609a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 63709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 63809a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 63909a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 64009a52d85SRichard Henderson } 64109a52d85SRichard Henderson 64209a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 64309a52d85SRichard Henderson { 64409a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff >> shift); 64509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 64609a52d85SRichard Henderson 64709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 64809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 64909a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 65009a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 65109a52d85SRichard Henderson } 65209a52d85SRichard Henderson 65309a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 65409a52d85SRichard Henderson { 65509a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 65609a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 65709a52d85SRichard Henderson } 65809a52d85SRichard Henderson 65909a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 66009a52d85SRichard Henderson { 66109a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 66209a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 66309a52d85SRichard Henderson } 66409a52d85SRichard Henderson 66509a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 66609a52d85SRichard Henderson { 66709a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 66809a52d85SRichard Henderson TCGv_vec m = tcg_temp_new_vec_matching(d); 66909a52d85SRichard Henderson 67009a52d85SRichard Henderson tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 67109a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh); 67209a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 67309a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 67409a52d85SRichard Henderson } 67509a52d85SRichard Henderson 67609a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 67709a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 67809a52d85SRichard Henderson { 67909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 68009a52d85SRichard Henderson const GVecGen2i ops[4] = { 68109a52d85SRichard Henderson { .fni8 = gen_shr8_ins_i64, 68209a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 68309a52d85SRichard Henderson .fno = gen_helper_gvec_sri_b, 68409a52d85SRichard Henderson .load_dest = true, 68509a52d85SRichard Henderson .opt_opc = vecop_list, 68609a52d85SRichard Henderson .vece = MO_8 }, 68709a52d85SRichard Henderson { .fni8 = gen_shr16_ins_i64, 68809a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 68909a52d85SRichard Henderson .fno = gen_helper_gvec_sri_h, 69009a52d85SRichard Henderson .load_dest = true, 69109a52d85SRichard Henderson .opt_opc = vecop_list, 69209a52d85SRichard Henderson .vece = MO_16 }, 69309a52d85SRichard Henderson { .fni4 = gen_shr32_ins_i32, 69409a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 69509a52d85SRichard Henderson .fno = gen_helper_gvec_sri_s, 69609a52d85SRichard Henderson .load_dest = true, 69709a52d85SRichard Henderson .opt_opc = vecop_list, 69809a52d85SRichard Henderson .vece = MO_32 }, 69909a52d85SRichard Henderson { .fni8 = gen_shr64_ins_i64, 70009a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 70109a52d85SRichard Henderson .fno = gen_helper_gvec_sri_d, 70209a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 70309a52d85SRichard Henderson .load_dest = true, 70409a52d85SRichard Henderson .opt_opc = vecop_list, 70509a52d85SRichard Henderson .vece = MO_64 }, 70609a52d85SRichard Henderson }; 70709a52d85SRichard Henderson 70809a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 70909a52d85SRichard Henderson tcg_debug_assert(shift > 0); 71009a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 71109a52d85SRichard Henderson 71209a52d85SRichard Henderson /* Shift of esize leaves destination unchanged. */ 71309a52d85SRichard Henderson if (shift < (8 << vece)) { 71409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 71509a52d85SRichard Henderson } else { 71609a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 71709a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 71809a52d85SRichard Henderson } 71909a52d85SRichard Henderson } 72009a52d85SRichard Henderson 72109a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 72209a52d85SRichard Henderson { 72309a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff << shift); 72409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 72509a52d85SRichard Henderson 72609a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 72709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 72809a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 72909a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 73009a52d85SRichard Henderson } 73109a52d85SRichard Henderson 73209a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 73309a52d85SRichard Henderson { 73409a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff << shift); 73509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 73609a52d85SRichard Henderson 73709a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 73809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 73909a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 74009a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 74109a52d85SRichard Henderson } 74209a52d85SRichard Henderson 74309a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 74409a52d85SRichard Henderson { 74509a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 74609a52d85SRichard Henderson } 74709a52d85SRichard Henderson 74809a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 74909a52d85SRichard Henderson { 75009a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 75109a52d85SRichard Henderson } 75209a52d85SRichard Henderson 75309a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 75409a52d85SRichard Henderson { 75509a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 75609a52d85SRichard Henderson TCGv_vec m = tcg_temp_new_vec_matching(d); 75709a52d85SRichard Henderson 75809a52d85SRichard Henderson tcg_gen_shli_vec(vece, t, a, sh); 75909a52d85SRichard Henderson tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 76009a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 76109a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 76209a52d85SRichard Henderson } 76309a52d85SRichard Henderson 76409a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 76509a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 76609a52d85SRichard Henderson { 76709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 76809a52d85SRichard Henderson const GVecGen2i ops[4] = { 76909a52d85SRichard Henderson { .fni8 = gen_shl8_ins_i64, 77009a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 77109a52d85SRichard Henderson .fno = gen_helper_gvec_sli_b, 77209a52d85SRichard Henderson .load_dest = true, 77309a52d85SRichard Henderson .opt_opc = vecop_list, 77409a52d85SRichard Henderson .vece = MO_8 }, 77509a52d85SRichard Henderson { .fni8 = gen_shl16_ins_i64, 77609a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 77709a52d85SRichard Henderson .fno = gen_helper_gvec_sli_h, 77809a52d85SRichard Henderson .load_dest = true, 77909a52d85SRichard Henderson .opt_opc = vecop_list, 78009a52d85SRichard Henderson .vece = MO_16 }, 78109a52d85SRichard Henderson { .fni4 = gen_shl32_ins_i32, 78209a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 78309a52d85SRichard Henderson .fno = gen_helper_gvec_sli_s, 78409a52d85SRichard Henderson .load_dest = true, 78509a52d85SRichard Henderson .opt_opc = vecop_list, 78609a52d85SRichard Henderson .vece = MO_32 }, 78709a52d85SRichard Henderson { .fni8 = gen_shl64_ins_i64, 78809a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 78909a52d85SRichard Henderson .fno = gen_helper_gvec_sli_d, 79009a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 79109a52d85SRichard Henderson .load_dest = true, 79209a52d85SRichard Henderson .opt_opc = vecop_list, 79309a52d85SRichard Henderson .vece = MO_64 }, 79409a52d85SRichard Henderson }; 79509a52d85SRichard Henderson 79609a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [0..esize-1]. */ 79709a52d85SRichard Henderson tcg_debug_assert(shift >= 0); 79809a52d85SRichard Henderson tcg_debug_assert(shift < (8 << vece)); 79909a52d85SRichard Henderson 80009a52d85SRichard Henderson if (shift == 0) { 80109a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 80209a52d85SRichard Henderson } else { 80309a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 80409a52d85SRichard Henderson } 80509a52d85SRichard Henderson } 80609a52d85SRichard Henderson 80709a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 80809a52d85SRichard Henderson { 80909a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 81009a52d85SRichard Henderson gen_helper_neon_add_u8(d, d, a); 81109a52d85SRichard Henderson } 81209a52d85SRichard Henderson 81309a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 81409a52d85SRichard Henderson { 81509a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 81609a52d85SRichard Henderson gen_helper_neon_sub_u8(d, d, a); 81709a52d85SRichard Henderson } 81809a52d85SRichard Henderson 81909a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 82009a52d85SRichard Henderson { 82109a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 82209a52d85SRichard Henderson gen_helper_neon_add_u16(d, d, a); 82309a52d85SRichard Henderson } 82409a52d85SRichard Henderson 82509a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 82609a52d85SRichard Henderson { 82709a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 82809a52d85SRichard Henderson gen_helper_neon_sub_u16(d, d, a); 82909a52d85SRichard Henderson } 83009a52d85SRichard Henderson 83109a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 83209a52d85SRichard Henderson { 83309a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 83409a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 83509a52d85SRichard Henderson } 83609a52d85SRichard Henderson 83709a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 83809a52d85SRichard Henderson { 83909a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 84009a52d85SRichard Henderson tcg_gen_sub_i32(d, d, a); 84109a52d85SRichard Henderson } 84209a52d85SRichard Henderson 84309a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 84409a52d85SRichard Henderson { 84509a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 84609a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 84709a52d85SRichard Henderson } 84809a52d85SRichard Henderson 84909a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 85009a52d85SRichard Henderson { 85109a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 85209a52d85SRichard Henderson tcg_gen_sub_i64(d, d, a); 85309a52d85SRichard Henderson } 85409a52d85SRichard Henderson 85509a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 85609a52d85SRichard Henderson { 85709a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 85809a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 85909a52d85SRichard Henderson } 86009a52d85SRichard Henderson 86109a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 86209a52d85SRichard Henderson { 86309a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 86409a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, a); 86509a52d85SRichard Henderson } 86609a52d85SRichard Henderson 86709a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 86809a52d85SRichard Henderson * these tables are shared with AArch64 which does support them. 86909a52d85SRichard Henderson */ 87009a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 87109a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 87209a52d85SRichard Henderson { 87309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 87409a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_add_vec, 0 87509a52d85SRichard Henderson }; 87609a52d85SRichard Henderson static const GVecGen3 ops[4] = { 87709a52d85SRichard Henderson { .fni4 = gen_mla8_i32, 87809a52d85SRichard Henderson .fniv = gen_mla_vec, 87909a52d85SRichard Henderson .load_dest = true, 88009a52d85SRichard Henderson .opt_opc = vecop_list, 88109a52d85SRichard Henderson .vece = MO_8 }, 88209a52d85SRichard Henderson { .fni4 = gen_mla16_i32, 88309a52d85SRichard Henderson .fniv = gen_mla_vec, 88409a52d85SRichard Henderson .load_dest = true, 88509a52d85SRichard Henderson .opt_opc = vecop_list, 88609a52d85SRichard Henderson .vece = MO_16 }, 88709a52d85SRichard Henderson { .fni4 = gen_mla32_i32, 88809a52d85SRichard Henderson .fniv = gen_mla_vec, 88909a52d85SRichard Henderson .load_dest = true, 89009a52d85SRichard Henderson .opt_opc = vecop_list, 89109a52d85SRichard Henderson .vece = MO_32 }, 89209a52d85SRichard Henderson { .fni8 = gen_mla64_i64, 89309a52d85SRichard Henderson .fniv = gen_mla_vec, 89409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 89509a52d85SRichard Henderson .load_dest = true, 89609a52d85SRichard Henderson .opt_opc = vecop_list, 89709a52d85SRichard Henderson .vece = MO_64 }, 89809a52d85SRichard Henderson }; 89909a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 90009a52d85SRichard Henderson } 90109a52d85SRichard Henderson 90209a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 90309a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 90409a52d85SRichard Henderson { 90509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 90609a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_sub_vec, 0 90709a52d85SRichard Henderson }; 90809a52d85SRichard Henderson static const GVecGen3 ops[4] = { 90909a52d85SRichard Henderson { .fni4 = gen_mls8_i32, 91009a52d85SRichard Henderson .fniv = gen_mls_vec, 91109a52d85SRichard Henderson .load_dest = true, 91209a52d85SRichard Henderson .opt_opc = vecop_list, 91309a52d85SRichard Henderson .vece = MO_8 }, 91409a52d85SRichard Henderson { .fni4 = gen_mls16_i32, 91509a52d85SRichard Henderson .fniv = gen_mls_vec, 91609a52d85SRichard Henderson .load_dest = true, 91709a52d85SRichard Henderson .opt_opc = vecop_list, 91809a52d85SRichard Henderson .vece = MO_16 }, 91909a52d85SRichard Henderson { .fni4 = gen_mls32_i32, 92009a52d85SRichard Henderson .fniv = gen_mls_vec, 92109a52d85SRichard Henderson .load_dest = true, 92209a52d85SRichard Henderson .opt_opc = vecop_list, 92309a52d85SRichard Henderson .vece = MO_32 }, 92409a52d85SRichard Henderson { .fni8 = gen_mls64_i64, 92509a52d85SRichard Henderson .fniv = gen_mls_vec, 92609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 92709a52d85SRichard Henderson .load_dest = true, 92809a52d85SRichard Henderson .opt_opc = vecop_list, 92909a52d85SRichard Henderson .vece = MO_64 }, 93009a52d85SRichard Henderson }; 93109a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 93209a52d85SRichard Henderson } 93309a52d85SRichard Henderson 93409a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */ 93509a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 93609a52d85SRichard Henderson { 937*013506e0SRichard Henderson tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b); 93809a52d85SRichard Henderson } 93909a52d85SRichard Henderson 94009a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 94109a52d85SRichard Henderson { 942*013506e0SRichard Henderson tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b); 94309a52d85SRichard Henderson } 94409a52d85SRichard Henderson 94509a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 94609a52d85SRichard Henderson { 94709a52d85SRichard Henderson tcg_gen_and_vec(vece, d, a, b); 94809a52d85SRichard Henderson tcg_gen_dupi_vec(vece, a, 0); 94909a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); 95009a52d85SRichard Henderson } 95109a52d85SRichard Henderson 95209a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 95309a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 95409a52d85SRichard Henderson { 95509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 95609a52d85SRichard Henderson static const GVecGen3 ops[4] = { 95709a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u8, 95809a52d85SRichard Henderson .fniv = gen_cmtst_vec, 95909a52d85SRichard Henderson .opt_opc = vecop_list, 96009a52d85SRichard Henderson .vece = MO_8 }, 96109a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u16, 96209a52d85SRichard Henderson .fniv = gen_cmtst_vec, 96309a52d85SRichard Henderson .opt_opc = vecop_list, 96409a52d85SRichard Henderson .vece = MO_16 }, 96509a52d85SRichard Henderson { .fni4 = gen_cmtst_i32, 96609a52d85SRichard Henderson .fniv = gen_cmtst_vec, 96709a52d85SRichard Henderson .opt_opc = vecop_list, 96809a52d85SRichard Henderson .vece = MO_32 }, 96909a52d85SRichard Henderson { .fni8 = gen_cmtst_i64, 97009a52d85SRichard Henderson .fniv = gen_cmtst_vec, 97109a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 97209a52d85SRichard Henderson .opt_opc = vecop_list, 97309a52d85SRichard Henderson .vece = MO_64 }, 97409a52d85SRichard Henderson }; 97509a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 97609a52d85SRichard Henderson } 97709a52d85SRichard Henderson 97809a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 97909a52d85SRichard Henderson { 98009a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 98109a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 98209a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 98309a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 98409a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 98509a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(32); 98609a52d85SRichard Henderson 98709a52d85SRichard Henderson /* 98809a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 98909a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 99009a52d85SRichard Henderson * Discard out-of-range results after the fact. 99109a52d85SRichard Henderson */ 99209a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 99309a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 99409a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 99509a52d85SRichard Henderson tcg_gen_shr_i32(rval, src, rsh); 99609a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 99709a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 99809a52d85SRichard Henderson } 99909a52d85SRichard Henderson 100009a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 100109a52d85SRichard Henderson { 100209a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 100309a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 100409a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 100509a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 100609a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 100709a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(64); 100809a52d85SRichard Henderson 100909a52d85SRichard Henderson /* 101009a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 101109a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 101209a52d85SRichard Henderson * Discard out-of-range results after the fact. 101309a52d85SRichard Henderson */ 101409a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 101509a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 101609a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 101709a52d85SRichard Henderson tcg_gen_shr_i64(rval, src, rsh); 101809a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 101909a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 102009a52d85SRichard Henderson } 102109a52d85SRichard Henderson 102209a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 102309a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 102409a52d85SRichard Henderson { 102509a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 102609a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 102709a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 102809a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 102909a52d85SRichard Henderson TCGv_vec msk, max; 103009a52d85SRichard Henderson 103109a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 103209a52d85SRichard Henderson if (vece == MO_8) { 103309a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 103409a52d85SRichard Henderson } else { 103509a52d85SRichard Henderson msk = tcg_temp_new_vec_matching(dst); 103609a52d85SRichard Henderson tcg_gen_dupi_vec(vece, msk, 0xff); 103709a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, msk); 103809a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, msk); 103909a52d85SRichard Henderson } 104009a52d85SRichard Henderson 104109a52d85SRichard Henderson /* 104209a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 104309a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 104409a52d85SRichard Henderson * Discard out-of-range results after the fact. 104509a52d85SRichard Henderson */ 104609a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 104709a52d85SRichard Henderson tcg_gen_shrv_vec(vece, rval, src, rsh); 104809a52d85SRichard Henderson 104909a52d85SRichard Henderson max = tcg_temp_new_vec_matching(dst); 105009a52d85SRichard Henderson tcg_gen_dupi_vec(vece, max, 8 << vece); 105109a52d85SRichard Henderson 105209a52d85SRichard Henderson /* 105309a52d85SRichard Henderson * The choice of LT (signed) and GEU (unsigned) are biased toward 105409a52d85SRichard Henderson * the instructions of the x86_64 host. For MO_8, the whole byte 105509a52d85SRichard Henderson * is significant so we must use an unsigned compare; otherwise we 105609a52d85SRichard Henderson * have already masked to a byte and so a signed compare works. 105709a52d85SRichard Henderson * Other tcg hosts have a full set of comparisons and do not care. 105809a52d85SRichard Henderson */ 105909a52d85SRichard Henderson if (vece == MO_8) { 106009a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 106109a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 106209a52d85SRichard Henderson tcg_gen_andc_vec(vece, lval, lval, lsh); 106309a52d85SRichard Henderson tcg_gen_andc_vec(vece, rval, rval, rsh); 106409a52d85SRichard Henderson } else { 106509a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 106609a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 106709a52d85SRichard Henderson tcg_gen_and_vec(vece, lval, lval, lsh); 106809a52d85SRichard Henderson tcg_gen_and_vec(vece, rval, rval, rsh); 106909a52d85SRichard Henderson } 107009a52d85SRichard Henderson tcg_gen_or_vec(vece, dst, lval, rval); 107109a52d85SRichard Henderson } 107209a52d85SRichard Henderson 107309a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 107409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 107509a52d85SRichard Henderson { 107609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 107709a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_shlv_vec, 107809a52d85SRichard Henderson INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 107909a52d85SRichard Henderson }; 108009a52d85SRichard Henderson static const GVecGen3 ops[4] = { 108109a52d85SRichard Henderson { .fniv = gen_ushl_vec, 108209a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_b, 108309a52d85SRichard Henderson .opt_opc = vecop_list, 108409a52d85SRichard Henderson .vece = MO_8 }, 108509a52d85SRichard Henderson { .fniv = gen_ushl_vec, 108609a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_h, 108709a52d85SRichard Henderson .opt_opc = vecop_list, 108809a52d85SRichard Henderson .vece = MO_16 }, 108909a52d85SRichard Henderson { .fni4 = gen_ushl_i32, 109009a52d85SRichard Henderson .fniv = gen_ushl_vec, 109109a52d85SRichard Henderson .opt_opc = vecop_list, 109209a52d85SRichard Henderson .vece = MO_32 }, 109309a52d85SRichard Henderson { .fni8 = gen_ushl_i64, 109409a52d85SRichard Henderson .fniv = gen_ushl_vec, 109509a52d85SRichard Henderson .opt_opc = vecop_list, 109609a52d85SRichard Henderson .vece = MO_64 }, 109709a52d85SRichard Henderson }; 109809a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 109909a52d85SRichard Henderson } 110009a52d85SRichard Henderson 110109a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 110209a52d85SRichard Henderson { 110309a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 110409a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 110509a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 110609a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 110709a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 110809a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(31); 110909a52d85SRichard Henderson 111009a52d85SRichard Henderson /* 111109a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 111209a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 111309a52d85SRichard Henderson * Discard out-of-range results after the fact. 111409a52d85SRichard Henderson */ 111509a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 111609a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 111709a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 111809a52d85SRichard Henderson tcg_gen_umin_i32(rsh, rsh, max); 111909a52d85SRichard Henderson tcg_gen_sar_i32(rval, src, rsh); 112009a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 112109a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 112209a52d85SRichard Henderson } 112309a52d85SRichard Henderson 112409a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 112509a52d85SRichard Henderson { 112609a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 112709a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 112809a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 112909a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 113009a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 113109a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(63); 113209a52d85SRichard Henderson 113309a52d85SRichard Henderson /* 113409a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 113509a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 113609a52d85SRichard Henderson * Discard out-of-range results after the fact. 113709a52d85SRichard Henderson */ 113809a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 113909a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 114009a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 114109a52d85SRichard Henderson tcg_gen_umin_i64(rsh, rsh, max); 114209a52d85SRichard Henderson tcg_gen_sar_i64(rval, src, rsh); 114309a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 114409a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 114509a52d85SRichard Henderson } 114609a52d85SRichard Henderson 114709a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 114809a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 114909a52d85SRichard Henderson { 115009a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 115109a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 115209a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 115309a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 115409a52d85SRichard Henderson TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 115509a52d85SRichard Henderson 115609a52d85SRichard Henderson /* 115709a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 115809a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 115909a52d85SRichard Henderson * Discard out-of-range results after the fact. 116009a52d85SRichard Henderson */ 116109a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 116209a52d85SRichard Henderson if (vece == MO_8) { 116309a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 116409a52d85SRichard Henderson } else { 116509a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0xff); 116609a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, tmp); 116709a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, tmp); 116809a52d85SRichard Henderson } 116909a52d85SRichard Henderson 117009a52d85SRichard Henderson /* Bound rsh so out of bound right shift gets -1. */ 117109a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 117209a52d85SRichard Henderson tcg_gen_umin_vec(vece, rsh, rsh, tmp); 117309a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 117409a52d85SRichard Henderson 117509a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 117609a52d85SRichard Henderson tcg_gen_sarv_vec(vece, rval, src, rsh); 117709a52d85SRichard Henderson 117809a52d85SRichard Henderson /* Select in-bound left shift. */ 117909a52d85SRichard Henderson tcg_gen_andc_vec(vece, lval, lval, tmp); 118009a52d85SRichard Henderson 118109a52d85SRichard Henderson /* Select between left and right shift. */ 118209a52d85SRichard Henderson if (vece == MO_8) { 118309a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0); 118409a52d85SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 118509a52d85SRichard Henderson } else { 118609a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0x80); 118709a52d85SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 118809a52d85SRichard Henderson } 118909a52d85SRichard Henderson } 119009a52d85SRichard Henderson 119109a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 119209a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 119309a52d85SRichard Henderson { 119409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 119509a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 119609a52d85SRichard Henderson INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 119709a52d85SRichard Henderson }; 119809a52d85SRichard Henderson static const GVecGen3 ops[4] = { 119909a52d85SRichard Henderson { .fniv = gen_sshl_vec, 120009a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_b, 120109a52d85SRichard Henderson .opt_opc = vecop_list, 120209a52d85SRichard Henderson .vece = MO_8 }, 120309a52d85SRichard Henderson { .fniv = gen_sshl_vec, 120409a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_h, 120509a52d85SRichard Henderson .opt_opc = vecop_list, 120609a52d85SRichard Henderson .vece = MO_16 }, 120709a52d85SRichard Henderson { .fni4 = gen_sshl_i32, 120809a52d85SRichard Henderson .fniv = gen_sshl_vec, 120909a52d85SRichard Henderson .opt_opc = vecop_list, 121009a52d85SRichard Henderson .vece = MO_32 }, 121109a52d85SRichard Henderson { .fni8 = gen_sshl_i64, 121209a52d85SRichard Henderson .fniv = gen_sshl_vec, 121309a52d85SRichard Henderson .opt_opc = vecop_list, 121409a52d85SRichard Henderson .vece = MO_64 }, 121509a52d85SRichard Henderson }; 121609a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 121709a52d85SRichard Henderson } 121809a52d85SRichard Henderson 1219940392c8SRichard Henderson void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1220940392c8SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1221940392c8SRichard Henderson { 1222940392c8SRichard Henderson static gen_helper_gvec_3 * const fns[] = { 1223940392c8SRichard Henderson gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h, 1224940392c8SRichard Henderson gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d, 1225940392c8SRichard Henderson }; 1226940392c8SRichard Henderson tcg_debug_assert(vece <= MO_64); 1227940392c8SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1228940392c8SRichard Henderson } 1229940392c8SRichard Henderson 1230940392c8SRichard Henderson void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1231940392c8SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1232940392c8SRichard Henderson { 1233940392c8SRichard Henderson static gen_helper_gvec_3 * const fns[] = { 1234940392c8SRichard Henderson gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h, 1235940392c8SRichard Henderson gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d, 1236940392c8SRichard Henderson }; 1237940392c8SRichard Henderson tcg_debug_assert(vece <= MO_64); 1238940392c8SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1239940392c8SRichard Henderson } 1240940392c8SRichard Henderson 1241e72a6878SRichard Henderson void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1242e72a6878SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1243e72a6878SRichard Henderson { 1244e72a6878SRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1245e72a6878SRichard Henderson gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h, 1246e72a6878SRichard Henderson gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d, 1247e72a6878SRichard Henderson }; 1248e72a6878SRichard Henderson tcg_debug_assert(vece <= MO_64); 1249e72a6878SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1250e72a6878SRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1251e72a6878SRichard Henderson } 1252e72a6878SRichard Henderson 1253e72a6878SRichard Henderson void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1254e72a6878SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1255e72a6878SRichard Henderson { 1256e72a6878SRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1257e72a6878SRichard Henderson gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h, 1258e72a6878SRichard Henderson gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d, 1259e72a6878SRichard Henderson }; 1260e72a6878SRichard Henderson tcg_debug_assert(vece <= MO_64); 1261e72a6878SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1262e72a6878SRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1263e72a6878SRichard Henderson } 1264e72a6878SRichard Henderson 1265cef9d54fSRichard Henderson void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1266cef9d54fSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1267cef9d54fSRichard Henderson { 1268cef9d54fSRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1269cef9d54fSRichard Henderson gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h, 1270cef9d54fSRichard Henderson gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d, 1271cef9d54fSRichard Henderson }; 1272cef9d54fSRichard Henderson tcg_debug_assert(vece <= MO_64); 1273cef9d54fSRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1274cef9d54fSRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1275cef9d54fSRichard Henderson } 1276cef9d54fSRichard Henderson 1277cef9d54fSRichard Henderson void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1278cef9d54fSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1279cef9d54fSRichard Henderson { 1280cef9d54fSRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1281cef9d54fSRichard Henderson gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h, 1282cef9d54fSRichard Henderson gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d, 1283cef9d54fSRichard Henderson }; 1284cef9d54fSRichard Henderson tcg_debug_assert(vece <= MO_64); 1285cef9d54fSRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1286cef9d54fSRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1287cef9d54fSRichard Henderson } 1288cef9d54fSRichard Henderson 1289f4fa83d6SRichard Henderson void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1290f4fa83d6SRichard Henderson { 1291f4fa83d6SRichard Henderson uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); 1292f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1293f4fa83d6SRichard Henderson 1294f4fa83d6SRichard Henderson tcg_gen_add_i64(tmp, a, b); 1295f4fa83d6SRichard Henderson tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); 1296f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1297f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1298f4fa83d6SRichard Henderson } 1299f4fa83d6SRichard Henderson 1300f4fa83d6SRichard Henderson void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1301f4fa83d6SRichard Henderson { 1302f4fa83d6SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1303f4fa83d6SRichard Henderson 1304f4fa83d6SRichard Henderson tcg_gen_add_i64(t, a, b); 1305f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, 1306f4fa83d6SRichard Henderson tcg_constant_i64(UINT64_MAX), t); 1307f4fa83d6SRichard Henderson tcg_gen_xor_i64(t, t, res); 1308f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t); 1309f4fa83d6SRichard Henderson } 1310f4fa83d6SRichard Henderson 131176f4a8aeSRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 131209a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 131309a52d85SRichard Henderson { 131409a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 131509a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 131609a52d85SRichard Henderson tcg_gen_usadd_vec(vece, t, a, b); 131776f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 131876f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 131909a52d85SRichard Henderson } 132009a52d85SRichard Henderson 132109a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 132209a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 132309a52d85SRichard Henderson { 132409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 132576f4a8aeSRichard Henderson INDEX_op_usadd_vec, INDEX_op_add_vec, 0 132609a52d85SRichard Henderson }; 132709a52d85SRichard Henderson static const GVecGen4 ops[4] = { 132809a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 132909a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_b, 133009a52d85SRichard Henderson .write_aofs = true, 133109a52d85SRichard Henderson .opt_opc = vecop_list, 133209a52d85SRichard Henderson .vece = MO_8 }, 133309a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 133409a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_h, 133509a52d85SRichard Henderson .write_aofs = true, 133609a52d85SRichard Henderson .opt_opc = vecop_list, 133709a52d85SRichard Henderson .vece = MO_16 }, 133809a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 133909a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_s, 134009a52d85SRichard Henderson .write_aofs = true, 134109a52d85SRichard Henderson .opt_opc = vecop_list, 134209a52d85SRichard Henderson .vece = MO_32 }, 134309a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 1344f4fa83d6SRichard Henderson .fni8 = gen_uqadd_d, 134509a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_d, 134609a52d85SRichard Henderson .write_aofs = true, 134709a52d85SRichard Henderson .opt_opc = vecop_list, 134809a52d85SRichard Henderson .vece = MO_64 }, 134909a52d85SRichard Henderson }; 135001d5665bSRichard Henderson 135101d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 135209a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 135309a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 135409a52d85SRichard Henderson } 135509a52d85SRichard Henderson 1356f4fa83d6SRichard Henderson void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1357f4fa83d6SRichard Henderson { 1358f4fa83d6SRichard Henderson int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1359f4fa83d6SRichard Henderson int64_t min = -1ll - max; 1360f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1361f4fa83d6SRichard Henderson 1362f4fa83d6SRichard Henderson tcg_gen_add_i64(tmp, a, b); 1363f4fa83d6SRichard Henderson tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1364f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1365f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1366f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1367f4fa83d6SRichard Henderson } 1368f4fa83d6SRichard Henderson 1369f4fa83d6SRichard Henderson void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1370f4fa83d6SRichard Henderson { 1371f4fa83d6SRichard Henderson TCGv_i64 t0 = tcg_temp_new_i64(); 1372f4fa83d6SRichard Henderson TCGv_i64 t1 = tcg_temp_new_i64(); 1373f4fa83d6SRichard Henderson TCGv_i64 t2 = tcg_temp_new_i64(); 1374f4fa83d6SRichard Henderson 1375f4fa83d6SRichard Henderson tcg_gen_add_i64(t0, a, b); 1376f4fa83d6SRichard Henderson 1377f4fa83d6SRichard Henderson /* Compute signed overflow indication into T1 */ 1378f4fa83d6SRichard Henderson tcg_gen_xor_i64(t1, a, b); 1379f4fa83d6SRichard Henderson tcg_gen_xor_i64(t2, t0, a); 1380f4fa83d6SRichard Henderson tcg_gen_andc_i64(t1, t2, t1); 1381f4fa83d6SRichard Henderson 1382f4fa83d6SRichard Henderson /* Compute saturated value into T2 */ 1383f4fa83d6SRichard Henderson tcg_gen_sari_i64(t2, a, 63); 1384f4fa83d6SRichard Henderson tcg_gen_xori_i64(t2, t2, INT64_MAX); 1385f4fa83d6SRichard Henderson 1386f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1387f4fa83d6SRichard Henderson tcg_gen_xor_i64(t0, t0, res); 1388f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t0); 1389f4fa83d6SRichard Henderson } 1390f4fa83d6SRichard Henderson 139176f4a8aeSRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 139209a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 139309a52d85SRichard Henderson { 139409a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 139509a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 139609a52d85SRichard Henderson tcg_gen_ssadd_vec(vece, t, a, b); 139776f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 139876f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 139909a52d85SRichard Henderson } 140009a52d85SRichard Henderson 140109a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 140209a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 140309a52d85SRichard Henderson { 140409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 140576f4a8aeSRichard Henderson INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 140609a52d85SRichard Henderson }; 140709a52d85SRichard Henderson static const GVecGen4 ops[4] = { 140809a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 140909a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_b, 141009a52d85SRichard Henderson .opt_opc = vecop_list, 141109a52d85SRichard Henderson .write_aofs = true, 141209a52d85SRichard Henderson .vece = MO_8 }, 141309a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 141409a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_h, 141509a52d85SRichard Henderson .opt_opc = vecop_list, 141609a52d85SRichard Henderson .write_aofs = true, 141709a52d85SRichard Henderson .vece = MO_16 }, 141809a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 141909a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_s, 142009a52d85SRichard Henderson .opt_opc = vecop_list, 142109a52d85SRichard Henderson .write_aofs = true, 142209a52d85SRichard Henderson .vece = MO_32 }, 142309a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 1424f4fa83d6SRichard Henderson .fni8 = gen_sqadd_d, 142509a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_d, 142609a52d85SRichard Henderson .opt_opc = vecop_list, 142709a52d85SRichard Henderson .write_aofs = true, 142809a52d85SRichard Henderson .vece = MO_64 }, 142909a52d85SRichard Henderson }; 143001d5665bSRichard Henderson 143101d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 143209a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 143309a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 143409a52d85SRichard Henderson } 143509a52d85SRichard Henderson 1436f4fa83d6SRichard Henderson void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1437f4fa83d6SRichard Henderson { 1438f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1439f4fa83d6SRichard Henderson 1440f4fa83d6SRichard Henderson tcg_gen_sub_i64(tmp, a, b); 1441f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); 1442f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1443f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1444f4fa83d6SRichard Henderson } 1445f4fa83d6SRichard Henderson 1446f4fa83d6SRichard Henderson void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1447f4fa83d6SRichard Henderson { 1448f4fa83d6SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1449f4fa83d6SRichard Henderson 1450f4fa83d6SRichard Henderson tcg_gen_sub_i64(t, a, b); 1451f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); 1452f4fa83d6SRichard Henderson tcg_gen_xor_i64(t, t, res); 1453f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t); 1454f4fa83d6SRichard Henderson } 1455f4fa83d6SRichard Henderson 145676f4a8aeSRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 145709a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 145809a52d85SRichard Henderson { 145909a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 146009a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 146109a52d85SRichard Henderson tcg_gen_ussub_vec(vece, t, a, b); 146276f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 146376f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 146409a52d85SRichard Henderson } 146509a52d85SRichard Henderson 146609a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 146709a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 146809a52d85SRichard Henderson { 146909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 147076f4a8aeSRichard Henderson INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 147109a52d85SRichard Henderson }; 147209a52d85SRichard Henderson static const GVecGen4 ops[4] = { 147309a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 147409a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_b, 147509a52d85SRichard Henderson .opt_opc = vecop_list, 147609a52d85SRichard Henderson .write_aofs = true, 147709a52d85SRichard Henderson .vece = MO_8 }, 147809a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 147909a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_h, 148009a52d85SRichard Henderson .opt_opc = vecop_list, 148109a52d85SRichard Henderson .write_aofs = true, 148209a52d85SRichard Henderson .vece = MO_16 }, 148309a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 148409a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_s, 148509a52d85SRichard Henderson .opt_opc = vecop_list, 148609a52d85SRichard Henderson .write_aofs = true, 148709a52d85SRichard Henderson .vece = MO_32 }, 148809a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 1489f4fa83d6SRichard Henderson .fni8 = gen_uqsub_d, 149009a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_d, 149109a52d85SRichard Henderson .opt_opc = vecop_list, 149209a52d85SRichard Henderson .write_aofs = true, 149309a52d85SRichard Henderson .vece = MO_64 }, 149409a52d85SRichard Henderson }; 149501d5665bSRichard Henderson 149601d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 149709a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 149809a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 149909a52d85SRichard Henderson } 150009a52d85SRichard Henderson 1501f4fa83d6SRichard Henderson void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1502f4fa83d6SRichard Henderson { 1503f4fa83d6SRichard Henderson int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1504f4fa83d6SRichard Henderson int64_t min = -1ll - max; 1505f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1506f4fa83d6SRichard Henderson 1507f4fa83d6SRichard Henderson tcg_gen_sub_i64(tmp, a, b); 1508f4fa83d6SRichard Henderson tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1509f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1510f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1511f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1512f4fa83d6SRichard Henderson } 1513f4fa83d6SRichard Henderson 1514f4fa83d6SRichard Henderson void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1515f4fa83d6SRichard Henderson { 1516f4fa83d6SRichard Henderson TCGv_i64 t0 = tcg_temp_new_i64(); 1517f4fa83d6SRichard Henderson TCGv_i64 t1 = tcg_temp_new_i64(); 1518f4fa83d6SRichard Henderson TCGv_i64 t2 = tcg_temp_new_i64(); 1519f4fa83d6SRichard Henderson 1520f4fa83d6SRichard Henderson tcg_gen_sub_i64(t0, a, b); 1521f4fa83d6SRichard Henderson 1522f4fa83d6SRichard Henderson /* Compute signed overflow indication into T1 */ 1523f4fa83d6SRichard Henderson tcg_gen_xor_i64(t1, a, b); 1524f4fa83d6SRichard Henderson tcg_gen_xor_i64(t2, t0, a); 1525f4fa83d6SRichard Henderson tcg_gen_and_i64(t1, t1, t2); 1526f4fa83d6SRichard Henderson 1527f4fa83d6SRichard Henderson /* Compute saturated value into T2 */ 1528f4fa83d6SRichard Henderson tcg_gen_sari_i64(t2, a, 63); 1529f4fa83d6SRichard Henderson tcg_gen_xori_i64(t2, t2, INT64_MAX); 1530f4fa83d6SRichard Henderson 1531f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1532f4fa83d6SRichard Henderson tcg_gen_xor_i64(t0, t0, res); 1533f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t0); 1534f4fa83d6SRichard Henderson } 1535f4fa83d6SRichard Henderson 153676f4a8aeSRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 153709a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 153809a52d85SRichard Henderson { 153909a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 154009a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 154109a52d85SRichard Henderson tcg_gen_sssub_vec(vece, t, a, b); 154276f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 154376f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 154409a52d85SRichard Henderson } 154509a52d85SRichard Henderson 154609a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 154709a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 154809a52d85SRichard Henderson { 154909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 155076f4a8aeSRichard Henderson INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 155109a52d85SRichard Henderson }; 155209a52d85SRichard Henderson static const GVecGen4 ops[4] = { 155309a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 155409a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_b, 155509a52d85SRichard Henderson .opt_opc = vecop_list, 155609a52d85SRichard Henderson .write_aofs = true, 155709a52d85SRichard Henderson .vece = MO_8 }, 155809a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 155909a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_h, 156009a52d85SRichard Henderson .opt_opc = vecop_list, 156109a52d85SRichard Henderson .write_aofs = true, 156209a52d85SRichard Henderson .vece = MO_16 }, 156309a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 156409a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_s, 156509a52d85SRichard Henderson .opt_opc = vecop_list, 156609a52d85SRichard Henderson .write_aofs = true, 156709a52d85SRichard Henderson .vece = MO_32 }, 156809a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 1569f4fa83d6SRichard Henderson .fni8 = gen_sqsub_d, 157009a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_d, 157109a52d85SRichard Henderson .opt_opc = vecop_list, 157209a52d85SRichard Henderson .write_aofs = true, 157309a52d85SRichard Henderson .vece = MO_64 }, 157409a52d85SRichard Henderson }; 157501d5665bSRichard Henderson 157601d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 157709a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 157809a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 157909a52d85SRichard Henderson } 158009a52d85SRichard Henderson 158109a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 158209a52d85SRichard Henderson { 158309a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 158409a52d85SRichard Henderson 158509a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 158609a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 158709a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 158809a52d85SRichard Henderson } 158909a52d85SRichard Henderson 159009a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 159109a52d85SRichard Henderson { 159209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 159309a52d85SRichard Henderson 159409a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 159509a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 159609a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 159709a52d85SRichard Henderson } 159809a52d85SRichard Henderson 159909a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 160009a52d85SRichard Henderson { 160109a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 160209a52d85SRichard Henderson 160309a52d85SRichard Henderson tcg_gen_smin_vec(vece, t, a, b); 160409a52d85SRichard Henderson tcg_gen_smax_vec(vece, d, a, b); 160509a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 160609a52d85SRichard Henderson } 160709a52d85SRichard Henderson 160809a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 160909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 161009a52d85SRichard Henderson { 161109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 161209a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 161309a52d85SRichard Henderson }; 161409a52d85SRichard Henderson static const GVecGen3 ops[4] = { 161509a52d85SRichard Henderson { .fniv = gen_sabd_vec, 161609a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_b, 161709a52d85SRichard Henderson .opt_opc = vecop_list, 161809a52d85SRichard Henderson .vece = MO_8 }, 161909a52d85SRichard Henderson { .fniv = gen_sabd_vec, 162009a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_h, 162109a52d85SRichard Henderson .opt_opc = vecop_list, 162209a52d85SRichard Henderson .vece = MO_16 }, 162309a52d85SRichard Henderson { .fni4 = gen_sabd_i32, 162409a52d85SRichard Henderson .fniv = gen_sabd_vec, 162509a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_s, 162609a52d85SRichard Henderson .opt_opc = vecop_list, 162709a52d85SRichard Henderson .vece = MO_32 }, 162809a52d85SRichard Henderson { .fni8 = gen_sabd_i64, 162909a52d85SRichard Henderson .fniv = gen_sabd_vec, 163009a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_d, 163109a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 163209a52d85SRichard Henderson .opt_opc = vecop_list, 163309a52d85SRichard Henderson .vece = MO_64 }, 163409a52d85SRichard Henderson }; 163509a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 163609a52d85SRichard Henderson } 163709a52d85SRichard Henderson 163809a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 163909a52d85SRichard Henderson { 164009a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 164109a52d85SRichard Henderson 164209a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 164309a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 164409a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 164509a52d85SRichard Henderson } 164609a52d85SRichard Henderson 164709a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 164809a52d85SRichard Henderson { 164909a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 165009a52d85SRichard Henderson 165109a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 165209a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 165309a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 165409a52d85SRichard Henderson } 165509a52d85SRichard Henderson 165609a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 165709a52d85SRichard Henderson { 165809a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 165909a52d85SRichard Henderson 166009a52d85SRichard Henderson tcg_gen_umin_vec(vece, t, a, b); 166109a52d85SRichard Henderson tcg_gen_umax_vec(vece, d, a, b); 166209a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 166309a52d85SRichard Henderson } 166409a52d85SRichard Henderson 166509a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 166609a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 166709a52d85SRichard Henderson { 166809a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 166909a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 167009a52d85SRichard Henderson }; 167109a52d85SRichard Henderson static const GVecGen3 ops[4] = { 167209a52d85SRichard Henderson { .fniv = gen_uabd_vec, 167309a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_b, 167409a52d85SRichard Henderson .opt_opc = vecop_list, 167509a52d85SRichard Henderson .vece = MO_8 }, 167609a52d85SRichard Henderson { .fniv = gen_uabd_vec, 167709a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_h, 167809a52d85SRichard Henderson .opt_opc = vecop_list, 167909a52d85SRichard Henderson .vece = MO_16 }, 168009a52d85SRichard Henderson { .fni4 = gen_uabd_i32, 168109a52d85SRichard Henderson .fniv = gen_uabd_vec, 168209a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_s, 168309a52d85SRichard Henderson .opt_opc = vecop_list, 168409a52d85SRichard Henderson .vece = MO_32 }, 168509a52d85SRichard Henderson { .fni8 = gen_uabd_i64, 168609a52d85SRichard Henderson .fniv = gen_uabd_vec, 168709a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_d, 168809a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 168909a52d85SRichard Henderson .opt_opc = vecop_list, 169009a52d85SRichard Henderson .vece = MO_64 }, 169109a52d85SRichard Henderson }; 169209a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 169309a52d85SRichard Henderson } 169409a52d85SRichard Henderson 169509a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 169609a52d85SRichard Henderson { 169709a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 169809a52d85SRichard Henderson gen_sabd_i32(t, a, b); 169909a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 170009a52d85SRichard Henderson } 170109a52d85SRichard Henderson 170209a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 170309a52d85SRichard Henderson { 170409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 170509a52d85SRichard Henderson gen_sabd_i64(t, a, b); 170609a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 170709a52d85SRichard Henderson } 170809a52d85SRichard Henderson 170909a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 171009a52d85SRichard Henderson { 171109a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 171209a52d85SRichard Henderson gen_sabd_vec(vece, t, a, b); 171309a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 171409a52d85SRichard Henderson } 171509a52d85SRichard Henderson 171609a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 171709a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 171809a52d85SRichard Henderson { 171909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 172009a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 172109a52d85SRichard Henderson INDEX_op_smin_vec, INDEX_op_smax_vec, 0 172209a52d85SRichard Henderson }; 172309a52d85SRichard Henderson static const GVecGen3 ops[4] = { 172409a52d85SRichard Henderson { .fniv = gen_saba_vec, 172509a52d85SRichard Henderson .fno = gen_helper_gvec_saba_b, 172609a52d85SRichard Henderson .opt_opc = vecop_list, 172709a52d85SRichard Henderson .load_dest = true, 172809a52d85SRichard Henderson .vece = MO_8 }, 172909a52d85SRichard Henderson { .fniv = gen_saba_vec, 173009a52d85SRichard Henderson .fno = gen_helper_gvec_saba_h, 173109a52d85SRichard Henderson .opt_opc = vecop_list, 173209a52d85SRichard Henderson .load_dest = true, 173309a52d85SRichard Henderson .vece = MO_16 }, 173409a52d85SRichard Henderson { .fni4 = gen_saba_i32, 173509a52d85SRichard Henderson .fniv = gen_saba_vec, 173609a52d85SRichard Henderson .fno = gen_helper_gvec_saba_s, 173709a52d85SRichard Henderson .opt_opc = vecop_list, 173809a52d85SRichard Henderson .load_dest = true, 173909a52d85SRichard Henderson .vece = MO_32 }, 174009a52d85SRichard Henderson { .fni8 = gen_saba_i64, 174109a52d85SRichard Henderson .fniv = gen_saba_vec, 174209a52d85SRichard Henderson .fno = gen_helper_gvec_saba_d, 174309a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 174409a52d85SRichard Henderson .opt_opc = vecop_list, 174509a52d85SRichard Henderson .load_dest = true, 174609a52d85SRichard Henderson .vece = MO_64 }, 174709a52d85SRichard Henderson }; 174809a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 174909a52d85SRichard Henderson } 175009a52d85SRichard Henderson 175109a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 175209a52d85SRichard Henderson { 175309a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 175409a52d85SRichard Henderson gen_uabd_i32(t, a, b); 175509a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 175609a52d85SRichard Henderson } 175709a52d85SRichard Henderson 175809a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 175909a52d85SRichard Henderson { 176009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 176109a52d85SRichard Henderson gen_uabd_i64(t, a, b); 176209a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 176309a52d85SRichard Henderson } 176409a52d85SRichard Henderson 176509a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 176609a52d85SRichard Henderson { 176709a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 176809a52d85SRichard Henderson gen_uabd_vec(vece, t, a, b); 176909a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 177009a52d85SRichard Henderson } 177109a52d85SRichard Henderson 177209a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 177309a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 177409a52d85SRichard Henderson { 177509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 177609a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 177709a52d85SRichard Henderson INDEX_op_umin_vec, INDEX_op_umax_vec, 0 177809a52d85SRichard Henderson }; 177909a52d85SRichard Henderson static const GVecGen3 ops[4] = { 178009a52d85SRichard Henderson { .fniv = gen_uaba_vec, 178109a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_b, 178209a52d85SRichard Henderson .opt_opc = vecop_list, 178309a52d85SRichard Henderson .load_dest = true, 178409a52d85SRichard Henderson .vece = MO_8 }, 178509a52d85SRichard Henderson { .fniv = gen_uaba_vec, 178609a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_h, 178709a52d85SRichard Henderson .opt_opc = vecop_list, 178809a52d85SRichard Henderson .load_dest = true, 178909a52d85SRichard Henderson .vece = MO_16 }, 179009a52d85SRichard Henderson { .fni4 = gen_uaba_i32, 179109a52d85SRichard Henderson .fniv = gen_uaba_vec, 179209a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_s, 179309a52d85SRichard Henderson .opt_opc = vecop_list, 179409a52d85SRichard Henderson .load_dest = true, 179509a52d85SRichard Henderson .vece = MO_32 }, 179609a52d85SRichard Henderson { .fni8 = gen_uaba_i64, 179709a52d85SRichard Henderson .fniv = gen_uaba_vec, 179809a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_d, 179909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 180009a52d85SRichard Henderson .opt_opc = vecop_list, 180109a52d85SRichard Henderson .load_dest = true, 180209a52d85SRichard Henderson .vece = MO_64 }, 180309a52d85SRichard Henderson }; 180409a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 180509a52d85SRichard Henderson } 1806a7e4eec6SRichard Henderson 1807a7e4eec6SRichard Henderson void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1808a7e4eec6SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1809a7e4eec6SRichard Henderson { 1810a7e4eec6SRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 1811a7e4eec6SRichard Henderson gen_helper_gvec_addp_b, 1812a7e4eec6SRichard Henderson gen_helper_gvec_addp_h, 1813a7e4eec6SRichard Henderson gen_helper_gvec_addp_s, 1814a7e4eec6SRichard Henderson gen_helper_gvec_addp_d, 1815a7e4eec6SRichard Henderson }; 1816a7e4eec6SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1817a7e4eec6SRichard Henderson } 181828b5451bSRichard Henderson 181928b5451bSRichard Henderson void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 182028b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 182128b5451bSRichard Henderson { 182228b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 182328b5451bSRichard Henderson gen_helper_gvec_smaxp_b, 182428b5451bSRichard Henderson gen_helper_gvec_smaxp_h, 182528b5451bSRichard Henderson gen_helper_gvec_smaxp_s, 182628b5451bSRichard Henderson }; 182728b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 182828b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 182928b5451bSRichard Henderson } 183028b5451bSRichard Henderson 183128b5451bSRichard Henderson void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 183228b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 183328b5451bSRichard Henderson { 183428b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 183528b5451bSRichard Henderson gen_helper_gvec_sminp_b, 183628b5451bSRichard Henderson gen_helper_gvec_sminp_h, 183728b5451bSRichard Henderson gen_helper_gvec_sminp_s, 183828b5451bSRichard Henderson }; 183928b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 184028b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 184128b5451bSRichard Henderson } 184228b5451bSRichard Henderson 184328b5451bSRichard Henderson void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 184428b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 184528b5451bSRichard Henderson { 184628b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 184728b5451bSRichard Henderson gen_helper_gvec_umaxp_b, 184828b5451bSRichard Henderson gen_helper_gvec_umaxp_h, 184928b5451bSRichard Henderson gen_helper_gvec_umaxp_s, 185028b5451bSRichard Henderson }; 185128b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 185228b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 185328b5451bSRichard Henderson } 185428b5451bSRichard Henderson 185528b5451bSRichard Henderson void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 185628b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 185728b5451bSRichard Henderson { 185828b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 185928b5451bSRichard Henderson gen_helper_gvec_uminp_b, 186028b5451bSRichard Henderson gen_helper_gvec_uminp_h, 186128b5451bSRichard Henderson gen_helper_gvec_uminp_s, 186228b5451bSRichard Henderson }; 186328b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 186428b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 186528b5451bSRichard Henderson } 1866