109a52d85SRichard Henderson /* 209a52d85SRichard Henderson * ARM generic vector expansion 309a52d85SRichard Henderson * 409a52d85SRichard Henderson * Copyright (c) 2003 Fabrice Bellard 509a52d85SRichard Henderson * Copyright (c) 2005-2007 CodeSourcery 609a52d85SRichard Henderson * Copyright (c) 2007 OpenedHand, Ltd. 709a52d85SRichard Henderson * 809a52d85SRichard Henderson * This library is free software; you can redistribute it and/or 909a52d85SRichard Henderson * modify it under the terms of the GNU Lesser General Public 1009a52d85SRichard Henderson * License as published by the Free Software Foundation; either 1109a52d85SRichard Henderson * version 2.1 of the License, or (at your option) any later version. 1209a52d85SRichard Henderson * 1309a52d85SRichard Henderson * This library is distributed in the hope that it will be useful, 1409a52d85SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 1509a52d85SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1609a52d85SRichard Henderson * Lesser General Public License for more details. 1709a52d85SRichard Henderson * 1809a52d85SRichard Henderson * You should have received a copy of the GNU Lesser General Public 1909a52d85SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 2009a52d85SRichard Henderson */ 2109a52d85SRichard Henderson 2209a52d85SRichard Henderson #include "qemu/osdep.h" 2309a52d85SRichard Henderson #include "translate.h" 2409a52d85SRichard Henderson 2509a52d85SRichard Henderson 2609a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 2709a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz, 2809a52d85SRichard Henderson gen_helper_gvec_3_ptr *fn) 2909a52d85SRichard Henderson { 3009a52d85SRichard Henderson TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 3109a52d85SRichard Henderson 3201d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 3309a52d85SRichard Henderson tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 3409a52d85SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 3509a52d85SRichard Henderson opr_sz, max_sz, 0, fn); 3609a52d85SRichard Henderson } 3709a52d85SRichard Henderson 3809a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 3909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 4009a52d85SRichard Henderson { 4109a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 4209a52d85SRichard Henderson gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 4309a52d85SRichard Henderson }; 4409a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 4509a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 4609a52d85SRichard Henderson } 4709a52d85SRichard Henderson 4809a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 4909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 5009a52d85SRichard Henderson { 5109a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 5209a52d85SRichard Henderson gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 5309a52d85SRichard Henderson }; 5409a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 5509a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 5609a52d85SRichard Henderson } 5709a52d85SRichard Henderson 5809a52d85SRichard Henderson #define GEN_CMP0(NAME, COND) \ 5909a52d85SRichard Henderson void NAME(unsigned vece, uint32_t d, uint32_t m, \ 6009a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz) \ 6109a52d85SRichard Henderson { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 6209a52d85SRichard Henderson 6309a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 6409a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 6509a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 6609a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 6709a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 6809a52d85SRichard Henderson 6909a52d85SRichard Henderson #undef GEN_CMP0 7009a52d85SRichard Henderson 7109a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 7209a52d85SRichard Henderson { 7309a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, shift); 7409a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 7509a52d85SRichard Henderson } 7609a52d85SRichard Henderson 7709a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 7809a52d85SRichard Henderson { 7909a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, shift); 8009a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 8109a52d85SRichard Henderson } 8209a52d85SRichard Henderson 8309a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 8409a52d85SRichard Henderson { 8509a52d85SRichard Henderson tcg_gen_sari_i32(a, a, shift); 8609a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 8709a52d85SRichard Henderson } 8809a52d85SRichard Henderson 8909a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 9009a52d85SRichard Henderson { 9109a52d85SRichard Henderson tcg_gen_sari_i64(a, a, shift); 9209a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 9309a52d85SRichard Henderson } 9409a52d85SRichard Henderson 9509a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 9609a52d85SRichard Henderson { 9709a52d85SRichard Henderson tcg_gen_sari_vec(vece, a, a, sh); 9809a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 9909a52d85SRichard Henderson } 10009a52d85SRichard Henderson 10109a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 10209a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 10309a52d85SRichard Henderson { 10409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 10509a52d85SRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 10609a52d85SRichard Henderson }; 10709a52d85SRichard Henderson static const GVecGen2i ops[4] = { 10809a52d85SRichard Henderson { .fni8 = gen_ssra8_i64, 10909a52d85SRichard Henderson .fniv = gen_ssra_vec, 11009a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_b, 11109a52d85SRichard Henderson .load_dest = true, 11209a52d85SRichard Henderson .opt_opc = vecop_list, 11309a52d85SRichard Henderson .vece = MO_8 }, 11409a52d85SRichard Henderson { .fni8 = gen_ssra16_i64, 11509a52d85SRichard Henderson .fniv = gen_ssra_vec, 11609a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_h, 11709a52d85SRichard Henderson .load_dest = true, 11809a52d85SRichard Henderson .opt_opc = vecop_list, 11909a52d85SRichard Henderson .vece = MO_16 }, 12009a52d85SRichard Henderson { .fni4 = gen_ssra32_i32, 12109a52d85SRichard Henderson .fniv = gen_ssra_vec, 12209a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_s, 12309a52d85SRichard Henderson .load_dest = true, 12409a52d85SRichard Henderson .opt_opc = vecop_list, 12509a52d85SRichard Henderson .vece = MO_32 }, 12609a52d85SRichard Henderson { .fni8 = gen_ssra64_i64, 12709a52d85SRichard Henderson .fniv = gen_ssra_vec, 12809a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_d, 12909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 13009a52d85SRichard Henderson .opt_opc = vecop_list, 13109a52d85SRichard Henderson .load_dest = true, 13209a52d85SRichard Henderson .vece = MO_64 }, 13309a52d85SRichard Henderson }; 13409a52d85SRichard Henderson 13509a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 13609a52d85SRichard Henderson tcg_debug_assert(shift > 0); 13709a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 13809a52d85SRichard Henderson 13909a52d85SRichard Henderson /* 14009a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 14109a52d85SRichard Henderson * Signed results in all sign bits. 14209a52d85SRichard Henderson */ 14309a52d85SRichard Henderson shift = MIN(shift, (8 << vece) - 1); 14409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 14509a52d85SRichard Henderson } 14609a52d85SRichard Henderson 14709a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 14809a52d85SRichard Henderson { 14909a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, shift); 15009a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 15109a52d85SRichard Henderson } 15209a52d85SRichard Henderson 15309a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 15409a52d85SRichard Henderson { 15509a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, shift); 15609a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 15709a52d85SRichard Henderson } 15809a52d85SRichard Henderson 15909a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 16009a52d85SRichard Henderson { 16109a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 16209a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 16309a52d85SRichard Henderson } 16409a52d85SRichard Henderson 16509a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 16609a52d85SRichard Henderson { 16709a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 16809a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 16909a52d85SRichard Henderson } 17009a52d85SRichard Henderson 17109a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 17209a52d85SRichard Henderson { 17309a52d85SRichard Henderson tcg_gen_shri_vec(vece, a, a, sh); 17409a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 17509a52d85SRichard Henderson } 17609a52d85SRichard Henderson 17709a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 17809a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 17909a52d85SRichard Henderson { 18009a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 18109a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 18209a52d85SRichard Henderson }; 18309a52d85SRichard Henderson static const GVecGen2i ops[4] = { 18409a52d85SRichard Henderson { .fni8 = gen_usra8_i64, 18509a52d85SRichard Henderson .fniv = gen_usra_vec, 18609a52d85SRichard Henderson .fno = gen_helper_gvec_usra_b, 18709a52d85SRichard Henderson .load_dest = true, 18809a52d85SRichard Henderson .opt_opc = vecop_list, 18909a52d85SRichard Henderson .vece = MO_8, }, 19009a52d85SRichard Henderson { .fni8 = gen_usra16_i64, 19109a52d85SRichard Henderson .fniv = gen_usra_vec, 19209a52d85SRichard Henderson .fno = gen_helper_gvec_usra_h, 19309a52d85SRichard Henderson .load_dest = true, 19409a52d85SRichard Henderson .opt_opc = vecop_list, 19509a52d85SRichard Henderson .vece = MO_16, }, 19609a52d85SRichard Henderson { .fni4 = gen_usra32_i32, 19709a52d85SRichard Henderson .fniv = gen_usra_vec, 19809a52d85SRichard Henderson .fno = gen_helper_gvec_usra_s, 19909a52d85SRichard Henderson .load_dest = true, 20009a52d85SRichard Henderson .opt_opc = vecop_list, 20109a52d85SRichard Henderson .vece = MO_32, }, 20209a52d85SRichard Henderson { .fni8 = gen_usra64_i64, 20309a52d85SRichard Henderson .fniv = gen_usra_vec, 20409a52d85SRichard Henderson .fno = gen_helper_gvec_usra_d, 20509a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 20609a52d85SRichard Henderson .load_dest = true, 20709a52d85SRichard Henderson .opt_opc = vecop_list, 20809a52d85SRichard Henderson .vece = MO_64, }, 20909a52d85SRichard Henderson }; 21009a52d85SRichard Henderson 21109a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 21209a52d85SRichard Henderson tcg_debug_assert(shift > 0); 21309a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 21409a52d85SRichard Henderson 21509a52d85SRichard Henderson /* 21609a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 21709a52d85SRichard Henderson * Unsigned results in all zeros as input to accumulate: nop. 21809a52d85SRichard Henderson */ 21909a52d85SRichard Henderson if (shift < (8 << vece)) { 22009a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 22109a52d85SRichard Henderson } else { 22209a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 22309a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 22409a52d85SRichard Henderson } 22509a52d85SRichard Henderson } 22609a52d85SRichard Henderson 22709a52d85SRichard Henderson /* 22809a52d85SRichard Henderson * Shift one less than the requested amount, and the low bit is 22909a52d85SRichard Henderson * the rounding bit. For the 8 and 16-bit operations, because we 23009a52d85SRichard Henderson * mask the low bit, we can perform a normal integer shift instead 23109a52d85SRichard Henderson * of a vector shift. 23209a52d85SRichard Henderson */ 23309a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 23409a52d85SRichard Henderson { 23509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 23609a52d85SRichard Henderson 23709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 23809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 23909a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(d, a, sh); 24009a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 24109a52d85SRichard Henderson } 24209a52d85SRichard Henderson 24309a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 24409a52d85SRichard Henderson { 24509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 24609a52d85SRichard Henderson 24709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 24809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 24909a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(d, a, sh); 25009a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 25109a52d85SRichard Henderson } 25209a52d85SRichard Henderson 25309a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 25409a52d85SRichard Henderson { 25509a52d85SRichard Henderson TCGv_i32 t; 25609a52d85SRichard Henderson 25709a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 25809a52d85SRichard Henderson if (sh == 32) { 25909a52d85SRichard Henderson tcg_gen_movi_i32(d, 0); 26009a52d85SRichard Henderson return; 26109a52d85SRichard Henderson } 26209a52d85SRichard Henderson t = tcg_temp_new_i32(); 26309a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 26409a52d85SRichard Henderson tcg_gen_sari_i32(d, a, sh); 26509a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 26609a52d85SRichard Henderson } 26709a52d85SRichard Henderson 26809a52d85SRichard Henderson void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 26909a52d85SRichard Henderson { 27009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 27109a52d85SRichard Henderson 27209a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 27309a52d85SRichard Henderson tcg_gen_sari_i64(d, a, sh); 27409a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 27509a52d85SRichard Henderson } 27609a52d85SRichard Henderson 27709a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 27809a52d85SRichard Henderson { 27909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 28009a52d85SRichard Henderson TCGv_vec ones = tcg_temp_new_vec_matching(d); 28109a52d85SRichard Henderson 28209a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 28309a52d85SRichard Henderson tcg_gen_dupi_vec(vece, ones, 1); 28409a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 28509a52d85SRichard Henderson tcg_gen_sari_vec(vece, d, a, sh); 28609a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 28709a52d85SRichard Henderson } 28809a52d85SRichard Henderson 28909a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 29009a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 29109a52d85SRichard Henderson { 29209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 29309a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 29409a52d85SRichard Henderson }; 29509a52d85SRichard Henderson static const GVecGen2i ops[4] = { 29609a52d85SRichard Henderson { .fni8 = gen_srshr8_i64, 29709a52d85SRichard Henderson .fniv = gen_srshr_vec, 29809a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_b, 29909a52d85SRichard Henderson .opt_opc = vecop_list, 30009a52d85SRichard Henderson .vece = MO_8 }, 30109a52d85SRichard Henderson { .fni8 = gen_srshr16_i64, 30209a52d85SRichard Henderson .fniv = gen_srshr_vec, 30309a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_h, 30409a52d85SRichard Henderson .opt_opc = vecop_list, 30509a52d85SRichard Henderson .vece = MO_16 }, 30609a52d85SRichard Henderson { .fni4 = gen_srshr32_i32, 30709a52d85SRichard Henderson .fniv = gen_srshr_vec, 30809a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_s, 30909a52d85SRichard Henderson .opt_opc = vecop_list, 31009a52d85SRichard Henderson .vece = MO_32 }, 31109a52d85SRichard Henderson { .fni8 = gen_srshr64_i64, 31209a52d85SRichard Henderson .fniv = gen_srshr_vec, 31309a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_d, 31409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 31509a52d85SRichard Henderson .opt_opc = vecop_list, 31609a52d85SRichard Henderson .vece = MO_64 }, 31709a52d85SRichard Henderson }; 31809a52d85SRichard Henderson 31909a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 32009a52d85SRichard Henderson tcg_debug_assert(shift > 0); 32109a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 32209a52d85SRichard Henderson 32309a52d85SRichard Henderson if (shift == (8 << vece)) { 32409a52d85SRichard Henderson /* 32509a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 32609a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 32709a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 32809a52d85SRichard Henderson * I.e. always zero. 32909a52d85SRichard Henderson */ 33009a52d85SRichard Henderson tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 33109a52d85SRichard Henderson } else { 33209a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 33309a52d85SRichard Henderson } 33409a52d85SRichard Henderson } 33509a52d85SRichard Henderson 33609a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 33709a52d85SRichard Henderson { 33809a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 33909a52d85SRichard Henderson 34009a52d85SRichard Henderson gen_srshr8_i64(t, a, sh); 34109a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 34209a52d85SRichard Henderson } 34309a52d85SRichard Henderson 34409a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 34509a52d85SRichard Henderson { 34609a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 34709a52d85SRichard Henderson 34809a52d85SRichard Henderson gen_srshr16_i64(t, a, sh); 34909a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 35009a52d85SRichard Henderson } 35109a52d85SRichard Henderson 35209a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 35309a52d85SRichard Henderson { 35409a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 35509a52d85SRichard Henderson 35609a52d85SRichard Henderson gen_srshr32_i32(t, a, sh); 35709a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 35809a52d85SRichard Henderson } 35909a52d85SRichard Henderson 36009a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 36109a52d85SRichard Henderson { 36209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 36309a52d85SRichard Henderson 36409a52d85SRichard Henderson gen_srshr64_i64(t, a, sh); 36509a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 36609a52d85SRichard Henderson } 36709a52d85SRichard Henderson 36809a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 36909a52d85SRichard Henderson { 37009a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 37109a52d85SRichard Henderson 37209a52d85SRichard Henderson gen_srshr_vec(vece, t, a, sh); 37309a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 37409a52d85SRichard Henderson } 37509a52d85SRichard Henderson 37609a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 37709a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 37809a52d85SRichard Henderson { 37909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 38009a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 38109a52d85SRichard Henderson }; 38209a52d85SRichard Henderson static const GVecGen2i ops[4] = { 38309a52d85SRichard Henderson { .fni8 = gen_srsra8_i64, 38409a52d85SRichard Henderson .fniv = gen_srsra_vec, 38509a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_b, 38609a52d85SRichard Henderson .opt_opc = vecop_list, 38709a52d85SRichard Henderson .load_dest = true, 38809a52d85SRichard Henderson .vece = MO_8 }, 38909a52d85SRichard Henderson { .fni8 = gen_srsra16_i64, 39009a52d85SRichard Henderson .fniv = gen_srsra_vec, 39109a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_h, 39209a52d85SRichard Henderson .opt_opc = vecop_list, 39309a52d85SRichard Henderson .load_dest = true, 39409a52d85SRichard Henderson .vece = MO_16 }, 39509a52d85SRichard Henderson { .fni4 = gen_srsra32_i32, 39609a52d85SRichard Henderson .fniv = gen_srsra_vec, 39709a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_s, 39809a52d85SRichard Henderson .opt_opc = vecop_list, 39909a52d85SRichard Henderson .load_dest = true, 40009a52d85SRichard Henderson .vece = MO_32 }, 40109a52d85SRichard Henderson { .fni8 = gen_srsra64_i64, 40209a52d85SRichard Henderson .fniv = gen_srsra_vec, 40309a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_d, 40409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 40509a52d85SRichard Henderson .opt_opc = vecop_list, 40609a52d85SRichard Henderson .load_dest = true, 40709a52d85SRichard Henderson .vece = MO_64 }, 40809a52d85SRichard Henderson }; 40909a52d85SRichard Henderson 41009a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 41109a52d85SRichard Henderson tcg_debug_assert(shift > 0); 41209a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 41309a52d85SRichard Henderson 41409a52d85SRichard Henderson /* 41509a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 41609a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 41709a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 41809a52d85SRichard Henderson * I.e. always zero. With accumulation, this leaves D unchanged. 41909a52d85SRichard Henderson */ 42009a52d85SRichard Henderson if (shift == (8 << vece)) { 42109a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 42209a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 42309a52d85SRichard Henderson } else { 42409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 42509a52d85SRichard Henderson } 42609a52d85SRichard Henderson } 42709a52d85SRichard Henderson 42809a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 42909a52d85SRichard Henderson { 43009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 43109a52d85SRichard Henderson 43209a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 43309a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 43409a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(d, a, sh); 43509a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 43609a52d85SRichard Henderson } 43709a52d85SRichard Henderson 43809a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 43909a52d85SRichard Henderson { 44009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 44109a52d85SRichard Henderson 44209a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 44309a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 44409a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(d, a, sh); 44509a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 44609a52d85SRichard Henderson } 44709a52d85SRichard Henderson 44809a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 44909a52d85SRichard Henderson { 45009a52d85SRichard Henderson TCGv_i32 t; 45109a52d85SRichard Henderson 45209a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 45309a52d85SRichard Henderson if (sh == 32) { 45409a52d85SRichard Henderson tcg_gen_extract_i32(d, a, sh - 1, 1); 45509a52d85SRichard Henderson return; 45609a52d85SRichard Henderson } 45709a52d85SRichard Henderson t = tcg_temp_new_i32(); 45809a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 45909a52d85SRichard Henderson tcg_gen_shri_i32(d, a, sh); 46009a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 46109a52d85SRichard Henderson } 46209a52d85SRichard Henderson 46309a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 46409a52d85SRichard Henderson { 46509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 46609a52d85SRichard Henderson 46709a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 46809a52d85SRichard Henderson tcg_gen_shri_i64(d, a, sh); 46909a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 47009a52d85SRichard Henderson } 47109a52d85SRichard Henderson 47209a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 47309a52d85SRichard Henderson { 47409a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 47509a52d85SRichard Henderson TCGv_vec ones = tcg_temp_new_vec_matching(d); 47609a52d85SRichard Henderson 47709a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, shift - 1); 47809a52d85SRichard Henderson tcg_gen_dupi_vec(vece, ones, 1); 47909a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 48009a52d85SRichard Henderson tcg_gen_shri_vec(vece, d, a, shift); 48109a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 48209a52d85SRichard Henderson } 48309a52d85SRichard Henderson 48409a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 48509a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 48609a52d85SRichard Henderson { 48709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 48809a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 48909a52d85SRichard Henderson }; 49009a52d85SRichard Henderson static const GVecGen2i ops[4] = { 49109a52d85SRichard Henderson { .fni8 = gen_urshr8_i64, 49209a52d85SRichard Henderson .fniv = gen_urshr_vec, 49309a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_b, 49409a52d85SRichard Henderson .opt_opc = vecop_list, 49509a52d85SRichard Henderson .vece = MO_8 }, 49609a52d85SRichard Henderson { .fni8 = gen_urshr16_i64, 49709a52d85SRichard Henderson .fniv = gen_urshr_vec, 49809a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_h, 49909a52d85SRichard Henderson .opt_opc = vecop_list, 50009a52d85SRichard Henderson .vece = MO_16 }, 50109a52d85SRichard Henderson { .fni4 = gen_urshr32_i32, 50209a52d85SRichard Henderson .fniv = gen_urshr_vec, 50309a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_s, 50409a52d85SRichard Henderson .opt_opc = vecop_list, 50509a52d85SRichard Henderson .vece = MO_32 }, 50609a52d85SRichard Henderson { .fni8 = gen_urshr64_i64, 50709a52d85SRichard Henderson .fniv = gen_urshr_vec, 50809a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_d, 50909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 51009a52d85SRichard Henderson .opt_opc = vecop_list, 51109a52d85SRichard Henderson .vece = MO_64 }, 51209a52d85SRichard Henderson }; 51309a52d85SRichard Henderson 51409a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 51509a52d85SRichard Henderson tcg_debug_assert(shift > 0); 51609a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 51709a52d85SRichard Henderson 51809a52d85SRichard Henderson if (shift == (8 << vece)) { 51909a52d85SRichard Henderson /* 52009a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 52109a52d85SRichard Henderson * Unsigned results in zero. With rounding, this produces a 52209a52d85SRichard Henderson * copy of the most significant bit. 52309a52d85SRichard Henderson */ 52409a52d85SRichard Henderson tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 52509a52d85SRichard Henderson } else { 52609a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 52709a52d85SRichard Henderson } 52809a52d85SRichard Henderson } 52909a52d85SRichard Henderson 53009a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 53109a52d85SRichard Henderson { 53209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 53309a52d85SRichard Henderson 53409a52d85SRichard Henderson if (sh == 8) { 53509a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(t, a, 7); 53609a52d85SRichard Henderson } else { 53709a52d85SRichard Henderson gen_urshr8_i64(t, a, sh); 53809a52d85SRichard Henderson } 53909a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 54009a52d85SRichard Henderson } 54109a52d85SRichard Henderson 54209a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 54309a52d85SRichard Henderson { 54409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 54509a52d85SRichard Henderson 54609a52d85SRichard Henderson if (sh == 16) { 54709a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(t, a, 15); 54809a52d85SRichard Henderson } else { 54909a52d85SRichard Henderson gen_urshr16_i64(t, a, sh); 55009a52d85SRichard Henderson } 55109a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 55209a52d85SRichard Henderson } 55309a52d85SRichard Henderson 55409a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 55509a52d85SRichard Henderson { 55609a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 55709a52d85SRichard Henderson 55809a52d85SRichard Henderson if (sh == 32) { 55909a52d85SRichard Henderson tcg_gen_shri_i32(t, a, 31); 56009a52d85SRichard Henderson } else { 56109a52d85SRichard Henderson gen_urshr32_i32(t, a, sh); 56209a52d85SRichard Henderson } 56309a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 56409a52d85SRichard Henderson } 56509a52d85SRichard Henderson 56609a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 56709a52d85SRichard Henderson { 56809a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 56909a52d85SRichard Henderson 57009a52d85SRichard Henderson if (sh == 64) { 57109a52d85SRichard Henderson tcg_gen_shri_i64(t, a, 63); 57209a52d85SRichard Henderson } else { 57309a52d85SRichard Henderson gen_urshr64_i64(t, a, sh); 57409a52d85SRichard Henderson } 57509a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 57609a52d85SRichard Henderson } 57709a52d85SRichard Henderson 57809a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 57909a52d85SRichard Henderson { 58009a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 58109a52d85SRichard Henderson 58209a52d85SRichard Henderson if (sh == (8 << vece)) { 58309a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 58409a52d85SRichard Henderson } else { 58509a52d85SRichard Henderson gen_urshr_vec(vece, t, a, sh); 58609a52d85SRichard Henderson } 58709a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 58809a52d85SRichard Henderson } 58909a52d85SRichard Henderson 59009a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 59109a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 59209a52d85SRichard Henderson { 59309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 59409a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 59509a52d85SRichard Henderson }; 59609a52d85SRichard Henderson static const GVecGen2i ops[4] = { 59709a52d85SRichard Henderson { .fni8 = gen_ursra8_i64, 59809a52d85SRichard Henderson .fniv = gen_ursra_vec, 59909a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_b, 60009a52d85SRichard Henderson .opt_opc = vecop_list, 60109a52d85SRichard Henderson .load_dest = true, 60209a52d85SRichard Henderson .vece = MO_8 }, 60309a52d85SRichard Henderson { .fni8 = gen_ursra16_i64, 60409a52d85SRichard Henderson .fniv = gen_ursra_vec, 60509a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_h, 60609a52d85SRichard Henderson .opt_opc = vecop_list, 60709a52d85SRichard Henderson .load_dest = true, 60809a52d85SRichard Henderson .vece = MO_16 }, 60909a52d85SRichard Henderson { .fni4 = gen_ursra32_i32, 61009a52d85SRichard Henderson .fniv = gen_ursra_vec, 61109a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_s, 61209a52d85SRichard Henderson .opt_opc = vecop_list, 61309a52d85SRichard Henderson .load_dest = true, 61409a52d85SRichard Henderson .vece = MO_32 }, 61509a52d85SRichard Henderson { .fni8 = gen_ursra64_i64, 61609a52d85SRichard Henderson .fniv = gen_ursra_vec, 61709a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_d, 61809a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 61909a52d85SRichard Henderson .opt_opc = vecop_list, 62009a52d85SRichard Henderson .load_dest = true, 62109a52d85SRichard Henderson .vece = MO_64 }, 62209a52d85SRichard Henderson }; 62309a52d85SRichard Henderson 62409a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 62509a52d85SRichard Henderson tcg_debug_assert(shift > 0); 62609a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 62709a52d85SRichard Henderson 62809a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 62909a52d85SRichard Henderson } 63009a52d85SRichard Henderson 63109a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 63209a52d85SRichard Henderson { 63309a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff >> shift); 63409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 63509a52d85SRichard Henderson 63609a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 63709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 63809a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 63909a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 64009a52d85SRichard Henderson } 64109a52d85SRichard Henderson 64209a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 64309a52d85SRichard Henderson { 64409a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff >> shift); 64509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 64609a52d85SRichard Henderson 64709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 64809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 64909a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 65009a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 65109a52d85SRichard Henderson } 65209a52d85SRichard Henderson 65309a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 65409a52d85SRichard Henderson { 65509a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 65609a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 65709a52d85SRichard Henderson } 65809a52d85SRichard Henderson 65909a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 66009a52d85SRichard Henderson { 66109a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 66209a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 66309a52d85SRichard Henderson } 66409a52d85SRichard Henderson 66509a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 66609a52d85SRichard Henderson { 66709a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 66809a52d85SRichard Henderson TCGv_vec m = tcg_temp_new_vec_matching(d); 66909a52d85SRichard Henderson 67009a52d85SRichard Henderson tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 67109a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh); 67209a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 67309a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 67409a52d85SRichard Henderson } 67509a52d85SRichard Henderson 67609a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 67709a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 67809a52d85SRichard Henderson { 67909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 68009a52d85SRichard Henderson const GVecGen2i ops[4] = { 68109a52d85SRichard Henderson { .fni8 = gen_shr8_ins_i64, 68209a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 68309a52d85SRichard Henderson .fno = gen_helper_gvec_sri_b, 68409a52d85SRichard Henderson .load_dest = true, 68509a52d85SRichard Henderson .opt_opc = vecop_list, 68609a52d85SRichard Henderson .vece = MO_8 }, 68709a52d85SRichard Henderson { .fni8 = gen_shr16_ins_i64, 68809a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 68909a52d85SRichard Henderson .fno = gen_helper_gvec_sri_h, 69009a52d85SRichard Henderson .load_dest = true, 69109a52d85SRichard Henderson .opt_opc = vecop_list, 69209a52d85SRichard Henderson .vece = MO_16 }, 69309a52d85SRichard Henderson { .fni4 = gen_shr32_ins_i32, 69409a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 69509a52d85SRichard Henderson .fno = gen_helper_gvec_sri_s, 69609a52d85SRichard Henderson .load_dest = true, 69709a52d85SRichard Henderson .opt_opc = vecop_list, 69809a52d85SRichard Henderson .vece = MO_32 }, 69909a52d85SRichard Henderson { .fni8 = gen_shr64_ins_i64, 70009a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 70109a52d85SRichard Henderson .fno = gen_helper_gvec_sri_d, 70209a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 70309a52d85SRichard Henderson .load_dest = true, 70409a52d85SRichard Henderson .opt_opc = vecop_list, 70509a52d85SRichard Henderson .vece = MO_64 }, 70609a52d85SRichard Henderson }; 70709a52d85SRichard Henderson 70809a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 70909a52d85SRichard Henderson tcg_debug_assert(shift > 0); 71009a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 71109a52d85SRichard Henderson 71209a52d85SRichard Henderson /* Shift of esize leaves destination unchanged. */ 71309a52d85SRichard Henderson if (shift < (8 << vece)) { 71409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 71509a52d85SRichard Henderson } else { 71609a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 71709a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 71809a52d85SRichard Henderson } 71909a52d85SRichard Henderson } 72009a52d85SRichard Henderson 72109a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 72209a52d85SRichard Henderson { 72309a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff << shift); 72409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 72509a52d85SRichard Henderson 72609a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 72709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 72809a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 72909a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 73009a52d85SRichard Henderson } 73109a52d85SRichard Henderson 73209a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 73309a52d85SRichard Henderson { 73409a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff << shift); 73509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 73609a52d85SRichard Henderson 73709a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 73809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 73909a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 74009a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 74109a52d85SRichard Henderson } 74209a52d85SRichard Henderson 74309a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 74409a52d85SRichard Henderson { 74509a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 74609a52d85SRichard Henderson } 74709a52d85SRichard Henderson 74809a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 74909a52d85SRichard Henderson { 75009a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 75109a52d85SRichard Henderson } 75209a52d85SRichard Henderson 75309a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 75409a52d85SRichard Henderson { 75509a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 75609a52d85SRichard Henderson TCGv_vec m = tcg_temp_new_vec_matching(d); 75709a52d85SRichard Henderson 75809a52d85SRichard Henderson tcg_gen_shli_vec(vece, t, a, sh); 75909a52d85SRichard Henderson tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 76009a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 76109a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 76209a52d85SRichard Henderson } 76309a52d85SRichard Henderson 76409a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 76509a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 76609a52d85SRichard Henderson { 76709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 76809a52d85SRichard Henderson const GVecGen2i ops[4] = { 76909a52d85SRichard Henderson { .fni8 = gen_shl8_ins_i64, 77009a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 77109a52d85SRichard Henderson .fno = gen_helper_gvec_sli_b, 77209a52d85SRichard Henderson .load_dest = true, 77309a52d85SRichard Henderson .opt_opc = vecop_list, 77409a52d85SRichard Henderson .vece = MO_8 }, 77509a52d85SRichard Henderson { .fni8 = gen_shl16_ins_i64, 77609a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 77709a52d85SRichard Henderson .fno = gen_helper_gvec_sli_h, 77809a52d85SRichard Henderson .load_dest = true, 77909a52d85SRichard Henderson .opt_opc = vecop_list, 78009a52d85SRichard Henderson .vece = MO_16 }, 78109a52d85SRichard Henderson { .fni4 = gen_shl32_ins_i32, 78209a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 78309a52d85SRichard Henderson .fno = gen_helper_gvec_sli_s, 78409a52d85SRichard Henderson .load_dest = true, 78509a52d85SRichard Henderson .opt_opc = vecop_list, 78609a52d85SRichard Henderson .vece = MO_32 }, 78709a52d85SRichard Henderson { .fni8 = gen_shl64_ins_i64, 78809a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 78909a52d85SRichard Henderson .fno = gen_helper_gvec_sli_d, 79009a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 79109a52d85SRichard Henderson .load_dest = true, 79209a52d85SRichard Henderson .opt_opc = vecop_list, 79309a52d85SRichard Henderson .vece = MO_64 }, 79409a52d85SRichard Henderson }; 79509a52d85SRichard Henderson 79609a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [0..esize-1]. */ 79709a52d85SRichard Henderson tcg_debug_assert(shift >= 0); 79809a52d85SRichard Henderson tcg_debug_assert(shift < (8 << vece)); 79909a52d85SRichard Henderson 80009a52d85SRichard Henderson if (shift == 0) { 80109a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 80209a52d85SRichard Henderson } else { 80309a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 80409a52d85SRichard Henderson } 80509a52d85SRichard Henderson } 80609a52d85SRichard Henderson 80709a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 80809a52d85SRichard Henderson { 80909a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 81009a52d85SRichard Henderson gen_helper_neon_add_u8(d, d, a); 81109a52d85SRichard Henderson } 81209a52d85SRichard Henderson 81309a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 81409a52d85SRichard Henderson { 81509a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 81609a52d85SRichard Henderson gen_helper_neon_sub_u8(d, d, a); 81709a52d85SRichard Henderson } 81809a52d85SRichard Henderson 81909a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 82009a52d85SRichard Henderson { 82109a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 82209a52d85SRichard Henderson gen_helper_neon_add_u16(d, d, a); 82309a52d85SRichard Henderson } 82409a52d85SRichard Henderson 82509a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 82609a52d85SRichard Henderson { 82709a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 82809a52d85SRichard Henderson gen_helper_neon_sub_u16(d, d, a); 82909a52d85SRichard Henderson } 83009a52d85SRichard Henderson 83109a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 83209a52d85SRichard Henderson { 83309a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 83409a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 83509a52d85SRichard Henderson } 83609a52d85SRichard Henderson 83709a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 83809a52d85SRichard Henderson { 83909a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 84009a52d85SRichard Henderson tcg_gen_sub_i32(d, d, a); 84109a52d85SRichard Henderson } 84209a52d85SRichard Henderson 84309a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 84409a52d85SRichard Henderson { 84509a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 84609a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 84709a52d85SRichard Henderson } 84809a52d85SRichard Henderson 84909a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 85009a52d85SRichard Henderson { 85109a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 85209a52d85SRichard Henderson tcg_gen_sub_i64(d, d, a); 85309a52d85SRichard Henderson } 85409a52d85SRichard Henderson 85509a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 85609a52d85SRichard Henderson { 85709a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 85809a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 85909a52d85SRichard Henderson } 86009a52d85SRichard Henderson 86109a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 86209a52d85SRichard Henderson { 86309a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 86409a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, a); 86509a52d85SRichard Henderson } 86609a52d85SRichard Henderson 86709a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 86809a52d85SRichard Henderson * these tables are shared with AArch64 which does support them. 86909a52d85SRichard Henderson */ 87009a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 87109a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 87209a52d85SRichard Henderson { 87309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 87409a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_add_vec, 0 87509a52d85SRichard Henderson }; 87609a52d85SRichard Henderson static const GVecGen3 ops[4] = { 87709a52d85SRichard Henderson { .fni4 = gen_mla8_i32, 87809a52d85SRichard Henderson .fniv = gen_mla_vec, 87909a52d85SRichard Henderson .load_dest = true, 88009a52d85SRichard Henderson .opt_opc = vecop_list, 88109a52d85SRichard Henderson .vece = MO_8 }, 88209a52d85SRichard Henderson { .fni4 = gen_mla16_i32, 88309a52d85SRichard Henderson .fniv = gen_mla_vec, 88409a52d85SRichard Henderson .load_dest = true, 88509a52d85SRichard Henderson .opt_opc = vecop_list, 88609a52d85SRichard Henderson .vece = MO_16 }, 88709a52d85SRichard Henderson { .fni4 = gen_mla32_i32, 88809a52d85SRichard Henderson .fniv = gen_mla_vec, 88909a52d85SRichard Henderson .load_dest = true, 89009a52d85SRichard Henderson .opt_opc = vecop_list, 89109a52d85SRichard Henderson .vece = MO_32 }, 89209a52d85SRichard Henderson { .fni8 = gen_mla64_i64, 89309a52d85SRichard Henderson .fniv = gen_mla_vec, 89409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 89509a52d85SRichard Henderson .load_dest = true, 89609a52d85SRichard Henderson .opt_opc = vecop_list, 89709a52d85SRichard Henderson .vece = MO_64 }, 89809a52d85SRichard Henderson }; 89909a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 90009a52d85SRichard Henderson } 90109a52d85SRichard Henderson 90209a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 90309a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 90409a52d85SRichard Henderson { 90509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 90609a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_sub_vec, 0 90709a52d85SRichard Henderson }; 90809a52d85SRichard Henderson static const GVecGen3 ops[4] = { 90909a52d85SRichard Henderson { .fni4 = gen_mls8_i32, 91009a52d85SRichard Henderson .fniv = gen_mls_vec, 91109a52d85SRichard Henderson .load_dest = true, 91209a52d85SRichard Henderson .opt_opc = vecop_list, 91309a52d85SRichard Henderson .vece = MO_8 }, 91409a52d85SRichard Henderson { .fni4 = gen_mls16_i32, 91509a52d85SRichard Henderson .fniv = gen_mls_vec, 91609a52d85SRichard Henderson .load_dest = true, 91709a52d85SRichard Henderson .opt_opc = vecop_list, 91809a52d85SRichard Henderson .vece = MO_16 }, 91909a52d85SRichard Henderson { .fni4 = gen_mls32_i32, 92009a52d85SRichard Henderson .fniv = gen_mls_vec, 92109a52d85SRichard Henderson .load_dest = true, 92209a52d85SRichard Henderson .opt_opc = vecop_list, 92309a52d85SRichard Henderson .vece = MO_32 }, 92409a52d85SRichard Henderson { .fni8 = gen_mls64_i64, 92509a52d85SRichard Henderson .fniv = gen_mls_vec, 92609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 92709a52d85SRichard Henderson .load_dest = true, 92809a52d85SRichard Henderson .opt_opc = vecop_list, 92909a52d85SRichard Henderson .vece = MO_64 }, 93009a52d85SRichard Henderson }; 93109a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 93209a52d85SRichard Henderson } 93309a52d85SRichard Henderson 93409a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */ 93509a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 93609a52d85SRichard Henderson { 93709a52d85SRichard Henderson tcg_gen_and_i32(d, a, b); 93809a52d85SRichard Henderson tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); 93909a52d85SRichard Henderson } 94009a52d85SRichard Henderson 94109a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 94209a52d85SRichard Henderson { 94309a52d85SRichard Henderson tcg_gen_and_i64(d, a, b); 94409a52d85SRichard Henderson tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); 94509a52d85SRichard Henderson } 94609a52d85SRichard Henderson 94709a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 94809a52d85SRichard Henderson { 94909a52d85SRichard Henderson tcg_gen_and_vec(vece, d, a, b); 95009a52d85SRichard Henderson tcg_gen_dupi_vec(vece, a, 0); 95109a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); 95209a52d85SRichard Henderson } 95309a52d85SRichard Henderson 95409a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 95509a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 95609a52d85SRichard Henderson { 95709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 95809a52d85SRichard Henderson static const GVecGen3 ops[4] = { 95909a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u8, 96009a52d85SRichard Henderson .fniv = gen_cmtst_vec, 96109a52d85SRichard Henderson .opt_opc = vecop_list, 96209a52d85SRichard Henderson .vece = MO_8 }, 96309a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u16, 96409a52d85SRichard Henderson .fniv = gen_cmtst_vec, 96509a52d85SRichard Henderson .opt_opc = vecop_list, 96609a52d85SRichard Henderson .vece = MO_16 }, 96709a52d85SRichard Henderson { .fni4 = gen_cmtst_i32, 96809a52d85SRichard Henderson .fniv = gen_cmtst_vec, 96909a52d85SRichard Henderson .opt_opc = vecop_list, 97009a52d85SRichard Henderson .vece = MO_32 }, 97109a52d85SRichard Henderson { .fni8 = gen_cmtst_i64, 97209a52d85SRichard Henderson .fniv = gen_cmtst_vec, 97309a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 97409a52d85SRichard Henderson .opt_opc = vecop_list, 97509a52d85SRichard Henderson .vece = MO_64 }, 97609a52d85SRichard Henderson }; 97709a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 97809a52d85SRichard Henderson } 97909a52d85SRichard Henderson 98009a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 98109a52d85SRichard Henderson { 98209a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 98309a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 98409a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 98509a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 98609a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 98709a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(32); 98809a52d85SRichard Henderson 98909a52d85SRichard Henderson /* 99009a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 99109a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 99209a52d85SRichard Henderson * Discard out-of-range results after the fact. 99309a52d85SRichard Henderson */ 99409a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 99509a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 99609a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 99709a52d85SRichard Henderson tcg_gen_shr_i32(rval, src, rsh); 99809a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 99909a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 100009a52d85SRichard Henderson } 100109a52d85SRichard Henderson 100209a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 100309a52d85SRichard Henderson { 100409a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 100509a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 100609a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 100709a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 100809a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 100909a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(64); 101009a52d85SRichard Henderson 101109a52d85SRichard Henderson /* 101209a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 101309a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 101409a52d85SRichard Henderson * Discard out-of-range results after the fact. 101509a52d85SRichard Henderson */ 101609a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 101709a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 101809a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 101909a52d85SRichard Henderson tcg_gen_shr_i64(rval, src, rsh); 102009a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 102109a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 102209a52d85SRichard Henderson } 102309a52d85SRichard Henderson 102409a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 102509a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 102609a52d85SRichard Henderson { 102709a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 102809a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 102909a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 103009a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 103109a52d85SRichard Henderson TCGv_vec msk, max; 103209a52d85SRichard Henderson 103309a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 103409a52d85SRichard Henderson if (vece == MO_8) { 103509a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 103609a52d85SRichard Henderson } else { 103709a52d85SRichard Henderson msk = tcg_temp_new_vec_matching(dst); 103809a52d85SRichard Henderson tcg_gen_dupi_vec(vece, msk, 0xff); 103909a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, msk); 104009a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, msk); 104109a52d85SRichard Henderson } 104209a52d85SRichard Henderson 104309a52d85SRichard Henderson /* 104409a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 104509a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 104609a52d85SRichard Henderson * Discard out-of-range results after the fact. 104709a52d85SRichard Henderson */ 104809a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 104909a52d85SRichard Henderson tcg_gen_shrv_vec(vece, rval, src, rsh); 105009a52d85SRichard Henderson 105109a52d85SRichard Henderson max = tcg_temp_new_vec_matching(dst); 105209a52d85SRichard Henderson tcg_gen_dupi_vec(vece, max, 8 << vece); 105309a52d85SRichard Henderson 105409a52d85SRichard Henderson /* 105509a52d85SRichard Henderson * The choice of LT (signed) and GEU (unsigned) are biased toward 105609a52d85SRichard Henderson * the instructions of the x86_64 host. For MO_8, the whole byte 105709a52d85SRichard Henderson * is significant so we must use an unsigned compare; otherwise we 105809a52d85SRichard Henderson * have already masked to a byte and so a signed compare works. 105909a52d85SRichard Henderson * Other tcg hosts have a full set of comparisons and do not care. 106009a52d85SRichard Henderson */ 106109a52d85SRichard Henderson if (vece == MO_8) { 106209a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 106309a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 106409a52d85SRichard Henderson tcg_gen_andc_vec(vece, lval, lval, lsh); 106509a52d85SRichard Henderson tcg_gen_andc_vec(vece, rval, rval, rsh); 106609a52d85SRichard Henderson } else { 106709a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 106809a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 106909a52d85SRichard Henderson tcg_gen_and_vec(vece, lval, lval, lsh); 107009a52d85SRichard Henderson tcg_gen_and_vec(vece, rval, rval, rsh); 107109a52d85SRichard Henderson } 107209a52d85SRichard Henderson tcg_gen_or_vec(vece, dst, lval, rval); 107309a52d85SRichard Henderson } 107409a52d85SRichard Henderson 107509a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 107609a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 107709a52d85SRichard Henderson { 107809a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 107909a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_shlv_vec, 108009a52d85SRichard Henderson INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 108109a52d85SRichard Henderson }; 108209a52d85SRichard Henderson static const GVecGen3 ops[4] = { 108309a52d85SRichard Henderson { .fniv = gen_ushl_vec, 108409a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_b, 108509a52d85SRichard Henderson .opt_opc = vecop_list, 108609a52d85SRichard Henderson .vece = MO_8 }, 108709a52d85SRichard Henderson { .fniv = gen_ushl_vec, 108809a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_h, 108909a52d85SRichard Henderson .opt_opc = vecop_list, 109009a52d85SRichard Henderson .vece = MO_16 }, 109109a52d85SRichard Henderson { .fni4 = gen_ushl_i32, 109209a52d85SRichard Henderson .fniv = gen_ushl_vec, 109309a52d85SRichard Henderson .opt_opc = vecop_list, 109409a52d85SRichard Henderson .vece = MO_32 }, 109509a52d85SRichard Henderson { .fni8 = gen_ushl_i64, 109609a52d85SRichard Henderson .fniv = gen_ushl_vec, 109709a52d85SRichard Henderson .opt_opc = vecop_list, 109809a52d85SRichard Henderson .vece = MO_64 }, 109909a52d85SRichard Henderson }; 110009a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 110109a52d85SRichard Henderson } 110209a52d85SRichard Henderson 110309a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 110409a52d85SRichard Henderson { 110509a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 110609a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 110709a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 110809a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 110909a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 111009a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(31); 111109a52d85SRichard Henderson 111209a52d85SRichard Henderson /* 111309a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 111409a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 111509a52d85SRichard Henderson * Discard out-of-range results after the fact. 111609a52d85SRichard Henderson */ 111709a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 111809a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 111909a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 112009a52d85SRichard Henderson tcg_gen_umin_i32(rsh, rsh, max); 112109a52d85SRichard Henderson tcg_gen_sar_i32(rval, src, rsh); 112209a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 112309a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 112409a52d85SRichard Henderson } 112509a52d85SRichard Henderson 112609a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 112709a52d85SRichard Henderson { 112809a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 112909a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 113009a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 113109a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 113209a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 113309a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(63); 113409a52d85SRichard Henderson 113509a52d85SRichard Henderson /* 113609a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 113709a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 113809a52d85SRichard Henderson * Discard out-of-range results after the fact. 113909a52d85SRichard Henderson */ 114009a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 114109a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 114209a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 114309a52d85SRichard Henderson tcg_gen_umin_i64(rsh, rsh, max); 114409a52d85SRichard Henderson tcg_gen_sar_i64(rval, src, rsh); 114509a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 114609a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 114709a52d85SRichard Henderson } 114809a52d85SRichard Henderson 114909a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 115009a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 115109a52d85SRichard Henderson { 115209a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 115309a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 115409a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 115509a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 115609a52d85SRichard Henderson TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 115709a52d85SRichard Henderson 115809a52d85SRichard Henderson /* 115909a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 116009a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 116109a52d85SRichard Henderson * Discard out-of-range results after the fact. 116209a52d85SRichard Henderson */ 116309a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 116409a52d85SRichard Henderson if (vece == MO_8) { 116509a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 116609a52d85SRichard Henderson } else { 116709a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0xff); 116809a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, tmp); 116909a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, tmp); 117009a52d85SRichard Henderson } 117109a52d85SRichard Henderson 117209a52d85SRichard Henderson /* Bound rsh so out of bound right shift gets -1. */ 117309a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 117409a52d85SRichard Henderson tcg_gen_umin_vec(vece, rsh, rsh, tmp); 117509a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 117609a52d85SRichard Henderson 117709a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 117809a52d85SRichard Henderson tcg_gen_sarv_vec(vece, rval, src, rsh); 117909a52d85SRichard Henderson 118009a52d85SRichard Henderson /* Select in-bound left shift. */ 118109a52d85SRichard Henderson tcg_gen_andc_vec(vece, lval, lval, tmp); 118209a52d85SRichard Henderson 118309a52d85SRichard Henderson /* Select between left and right shift. */ 118409a52d85SRichard Henderson if (vece == MO_8) { 118509a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0); 118609a52d85SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 118709a52d85SRichard Henderson } else { 118809a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0x80); 118909a52d85SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 119009a52d85SRichard Henderson } 119109a52d85SRichard Henderson } 119209a52d85SRichard Henderson 119309a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 119409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 119509a52d85SRichard Henderson { 119609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 119709a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 119809a52d85SRichard Henderson INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 119909a52d85SRichard Henderson }; 120009a52d85SRichard Henderson static const GVecGen3 ops[4] = { 120109a52d85SRichard Henderson { .fniv = gen_sshl_vec, 120209a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_b, 120309a52d85SRichard Henderson .opt_opc = vecop_list, 120409a52d85SRichard Henderson .vece = MO_8 }, 120509a52d85SRichard Henderson { .fniv = gen_sshl_vec, 120609a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_h, 120709a52d85SRichard Henderson .opt_opc = vecop_list, 120809a52d85SRichard Henderson .vece = MO_16 }, 120909a52d85SRichard Henderson { .fni4 = gen_sshl_i32, 121009a52d85SRichard Henderson .fniv = gen_sshl_vec, 121109a52d85SRichard Henderson .opt_opc = vecop_list, 121209a52d85SRichard Henderson .vece = MO_32 }, 121309a52d85SRichard Henderson { .fni8 = gen_sshl_i64, 121409a52d85SRichard Henderson .fniv = gen_sshl_vec, 121509a52d85SRichard Henderson .opt_opc = vecop_list, 121609a52d85SRichard Henderson .vece = MO_64 }, 121709a52d85SRichard Henderson }; 121809a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 121909a52d85SRichard Henderson } 122009a52d85SRichard Henderson 1221*f4fa83d6SRichard Henderson void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1222*f4fa83d6SRichard Henderson { 1223*f4fa83d6SRichard Henderson uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); 1224*f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1225*f4fa83d6SRichard Henderson 1226*f4fa83d6SRichard Henderson tcg_gen_add_i64(tmp, a, b); 1227*f4fa83d6SRichard Henderson tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); 1228*f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1229*f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1230*f4fa83d6SRichard Henderson } 1231*f4fa83d6SRichard Henderson 1232*f4fa83d6SRichard Henderson void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1233*f4fa83d6SRichard Henderson { 1234*f4fa83d6SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1235*f4fa83d6SRichard Henderson 1236*f4fa83d6SRichard Henderson tcg_gen_add_i64(t, a, b); 1237*f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, 1238*f4fa83d6SRichard Henderson tcg_constant_i64(UINT64_MAX), t); 1239*f4fa83d6SRichard Henderson tcg_gen_xor_i64(t, t, res); 1240*f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t); 1241*f4fa83d6SRichard Henderson } 1242*f4fa83d6SRichard Henderson 124376f4a8aeSRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 124409a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 124509a52d85SRichard Henderson { 124609a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 124709a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 124809a52d85SRichard Henderson tcg_gen_usadd_vec(vece, t, a, b); 124976f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 125076f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 125109a52d85SRichard Henderson } 125209a52d85SRichard Henderson 125309a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 125409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 125509a52d85SRichard Henderson { 125609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 125776f4a8aeSRichard Henderson INDEX_op_usadd_vec, INDEX_op_add_vec, 0 125809a52d85SRichard Henderson }; 125909a52d85SRichard Henderson static const GVecGen4 ops[4] = { 126009a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 126109a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_b, 126209a52d85SRichard Henderson .write_aofs = true, 126309a52d85SRichard Henderson .opt_opc = vecop_list, 126409a52d85SRichard Henderson .vece = MO_8 }, 126509a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 126609a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_h, 126709a52d85SRichard Henderson .write_aofs = true, 126809a52d85SRichard Henderson .opt_opc = vecop_list, 126909a52d85SRichard Henderson .vece = MO_16 }, 127009a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 127109a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_s, 127209a52d85SRichard Henderson .write_aofs = true, 127309a52d85SRichard Henderson .opt_opc = vecop_list, 127409a52d85SRichard Henderson .vece = MO_32 }, 127509a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 1276*f4fa83d6SRichard Henderson .fni8 = gen_uqadd_d, 127709a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_d, 127809a52d85SRichard Henderson .write_aofs = true, 127909a52d85SRichard Henderson .opt_opc = vecop_list, 128009a52d85SRichard Henderson .vece = MO_64 }, 128109a52d85SRichard Henderson }; 128201d5665bSRichard Henderson 128301d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 128409a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 128509a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 128609a52d85SRichard Henderson } 128709a52d85SRichard Henderson 1288*f4fa83d6SRichard Henderson void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1289*f4fa83d6SRichard Henderson { 1290*f4fa83d6SRichard Henderson int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1291*f4fa83d6SRichard Henderson int64_t min = -1ll - max; 1292*f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1293*f4fa83d6SRichard Henderson 1294*f4fa83d6SRichard Henderson tcg_gen_add_i64(tmp, a, b); 1295*f4fa83d6SRichard Henderson tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1296*f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1297*f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1298*f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1299*f4fa83d6SRichard Henderson } 1300*f4fa83d6SRichard Henderson 1301*f4fa83d6SRichard Henderson void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1302*f4fa83d6SRichard Henderson { 1303*f4fa83d6SRichard Henderson TCGv_i64 t0 = tcg_temp_new_i64(); 1304*f4fa83d6SRichard Henderson TCGv_i64 t1 = tcg_temp_new_i64(); 1305*f4fa83d6SRichard Henderson TCGv_i64 t2 = tcg_temp_new_i64(); 1306*f4fa83d6SRichard Henderson 1307*f4fa83d6SRichard Henderson tcg_gen_add_i64(t0, a, b); 1308*f4fa83d6SRichard Henderson 1309*f4fa83d6SRichard Henderson /* Compute signed overflow indication into T1 */ 1310*f4fa83d6SRichard Henderson tcg_gen_xor_i64(t1, a, b); 1311*f4fa83d6SRichard Henderson tcg_gen_xor_i64(t2, t0, a); 1312*f4fa83d6SRichard Henderson tcg_gen_andc_i64(t1, t2, t1); 1313*f4fa83d6SRichard Henderson 1314*f4fa83d6SRichard Henderson /* Compute saturated value into T2 */ 1315*f4fa83d6SRichard Henderson tcg_gen_sari_i64(t2, a, 63); 1316*f4fa83d6SRichard Henderson tcg_gen_xori_i64(t2, t2, INT64_MAX); 1317*f4fa83d6SRichard Henderson 1318*f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1319*f4fa83d6SRichard Henderson tcg_gen_xor_i64(t0, t0, res); 1320*f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t0); 1321*f4fa83d6SRichard Henderson } 1322*f4fa83d6SRichard Henderson 132376f4a8aeSRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 132409a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 132509a52d85SRichard Henderson { 132609a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 132709a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 132809a52d85SRichard Henderson tcg_gen_ssadd_vec(vece, t, a, b); 132976f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 133076f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 133109a52d85SRichard Henderson } 133209a52d85SRichard Henderson 133309a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 133409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 133509a52d85SRichard Henderson { 133609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 133776f4a8aeSRichard Henderson INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 133809a52d85SRichard Henderson }; 133909a52d85SRichard Henderson static const GVecGen4 ops[4] = { 134009a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 134109a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_b, 134209a52d85SRichard Henderson .opt_opc = vecop_list, 134309a52d85SRichard Henderson .write_aofs = true, 134409a52d85SRichard Henderson .vece = MO_8 }, 134509a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 134609a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_h, 134709a52d85SRichard Henderson .opt_opc = vecop_list, 134809a52d85SRichard Henderson .write_aofs = true, 134909a52d85SRichard Henderson .vece = MO_16 }, 135009a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 135109a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_s, 135209a52d85SRichard Henderson .opt_opc = vecop_list, 135309a52d85SRichard Henderson .write_aofs = true, 135409a52d85SRichard Henderson .vece = MO_32 }, 135509a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 1356*f4fa83d6SRichard Henderson .fni8 = gen_sqadd_d, 135709a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_d, 135809a52d85SRichard Henderson .opt_opc = vecop_list, 135909a52d85SRichard Henderson .write_aofs = true, 136009a52d85SRichard Henderson .vece = MO_64 }, 136109a52d85SRichard Henderson }; 136201d5665bSRichard Henderson 136301d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 136409a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 136509a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 136609a52d85SRichard Henderson } 136709a52d85SRichard Henderson 1368*f4fa83d6SRichard Henderson void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1369*f4fa83d6SRichard Henderson { 1370*f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1371*f4fa83d6SRichard Henderson 1372*f4fa83d6SRichard Henderson tcg_gen_sub_i64(tmp, a, b); 1373*f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); 1374*f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1375*f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1376*f4fa83d6SRichard Henderson } 1377*f4fa83d6SRichard Henderson 1378*f4fa83d6SRichard Henderson void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1379*f4fa83d6SRichard Henderson { 1380*f4fa83d6SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1381*f4fa83d6SRichard Henderson 1382*f4fa83d6SRichard Henderson tcg_gen_sub_i64(t, a, b); 1383*f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); 1384*f4fa83d6SRichard Henderson tcg_gen_xor_i64(t, t, res); 1385*f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t); 1386*f4fa83d6SRichard Henderson } 1387*f4fa83d6SRichard Henderson 138876f4a8aeSRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 138909a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 139009a52d85SRichard Henderson { 139109a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 139209a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 139309a52d85SRichard Henderson tcg_gen_ussub_vec(vece, t, a, b); 139476f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 139576f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 139609a52d85SRichard Henderson } 139709a52d85SRichard Henderson 139809a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 139909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 140009a52d85SRichard Henderson { 140109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 140276f4a8aeSRichard Henderson INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 140309a52d85SRichard Henderson }; 140409a52d85SRichard Henderson static const GVecGen4 ops[4] = { 140509a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 140609a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_b, 140709a52d85SRichard Henderson .opt_opc = vecop_list, 140809a52d85SRichard Henderson .write_aofs = true, 140909a52d85SRichard Henderson .vece = MO_8 }, 141009a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 141109a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_h, 141209a52d85SRichard Henderson .opt_opc = vecop_list, 141309a52d85SRichard Henderson .write_aofs = true, 141409a52d85SRichard Henderson .vece = MO_16 }, 141509a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 141609a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_s, 141709a52d85SRichard Henderson .opt_opc = vecop_list, 141809a52d85SRichard Henderson .write_aofs = true, 141909a52d85SRichard Henderson .vece = MO_32 }, 142009a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 1421*f4fa83d6SRichard Henderson .fni8 = gen_uqsub_d, 142209a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_d, 142309a52d85SRichard Henderson .opt_opc = vecop_list, 142409a52d85SRichard Henderson .write_aofs = true, 142509a52d85SRichard Henderson .vece = MO_64 }, 142609a52d85SRichard Henderson }; 142701d5665bSRichard Henderson 142801d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 142909a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 143009a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 143109a52d85SRichard Henderson } 143209a52d85SRichard Henderson 1433*f4fa83d6SRichard Henderson void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1434*f4fa83d6SRichard Henderson { 1435*f4fa83d6SRichard Henderson int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1436*f4fa83d6SRichard Henderson int64_t min = -1ll - max; 1437*f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1438*f4fa83d6SRichard Henderson 1439*f4fa83d6SRichard Henderson tcg_gen_sub_i64(tmp, a, b); 1440*f4fa83d6SRichard Henderson tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1441*f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1442*f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1443*f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1444*f4fa83d6SRichard Henderson } 1445*f4fa83d6SRichard Henderson 1446*f4fa83d6SRichard Henderson void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1447*f4fa83d6SRichard Henderson { 1448*f4fa83d6SRichard Henderson TCGv_i64 t0 = tcg_temp_new_i64(); 1449*f4fa83d6SRichard Henderson TCGv_i64 t1 = tcg_temp_new_i64(); 1450*f4fa83d6SRichard Henderson TCGv_i64 t2 = tcg_temp_new_i64(); 1451*f4fa83d6SRichard Henderson 1452*f4fa83d6SRichard Henderson tcg_gen_sub_i64(t0, a, b); 1453*f4fa83d6SRichard Henderson 1454*f4fa83d6SRichard Henderson /* Compute signed overflow indication into T1 */ 1455*f4fa83d6SRichard Henderson tcg_gen_xor_i64(t1, a, b); 1456*f4fa83d6SRichard Henderson tcg_gen_xor_i64(t2, t0, a); 1457*f4fa83d6SRichard Henderson tcg_gen_and_i64(t1, t1, t2); 1458*f4fa83d6SRichard Henderson 1459*f4fa83d6SRichard Henderson /* Compute saturated value into T2 */ 1460*f4fa83d6SRichard Henderson tcg_gen_sari_i64(t2, a, 63); 1461*f4fa83d6SRichard Henderson tcg_gen_xori_i64(t2, t2, INT64_MAX); 1462*f4fa83d6SRichard Henderson 1463*f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1464*f4fa83d6SRichard Henderson tcg_gen_xor_i64(t0, t0, res); 1465*f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t0); 1466*f4fa83d6SRichard Henderson } 1467*f4fa83d6SRichard Henderson 146876f4a8aeSRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 146909a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 147009a52d85SRichard Henderson { 147109a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 147209a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 147309a52d85SRichard Henderson tcg_gen_sssub_vec(vece, t, a, b); 147476f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 147576f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 147609a52d85SRichard Henderson } 147709a52d85SRichard Henderson 147809a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 147909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 148009a52d85SRichard Henderson { 148109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 148276f4a8aeSRichard Henderson INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 148309a52d85SRichard Henderson }; 148409a52d85SRichard Henderson static const GVecGen4 ops[4] = { 148509a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 148609a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_b, 148709a52d85SRichard Henderson .opt_opc = vecop_list, 148809a52d85SRichard Henderson .write_aofs = true, 148909a52d85SRichard Henderson .vece = MO_8 }, 149009a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 149109a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_h, 149209a52d85SRichard Henderson .opt_opc = vecop_list, 149309a52d85SRichard Henderson .write_aofs = true, 149409a52d85SRichard Henderson .vece = MO_16 }, 149509a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 149609a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_s, 149709a52d85SRichard Henderson .opt_opc = vecop_list, 149809a52d85SRichard Henderson .write_aofs = true, 149909a52d85SRichard Henderson .vece = MO_32 }, 150009a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 1501*f4fa83d6SRichard Henderson .fni8 = gen_sqsub_d, 150209a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_d, 150309a52d85SRichard Henderson .opt_opc = vecop_list, 150409a52d85SRichard Henderson .write_aofs = true, 150509a52d85SRichard Henderson .vece = MO_64 }, 150609a52d85SRichard Henderson }; 150701d5665bSRichard Henderson 150801d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 150909a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 151009a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 151109a52d85SRichard Henderson } 151209a52d85SRichard Henderson 151309a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 151409a52d85SRichard Henderson { 151509a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 151609a52d85SRichard Henderson 151709a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 151809a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 151909a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 152009a52d85SRichard Henderson } 152109a52d85SRichard Henderson 152209a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 152309a52d85SRichard Henderson { 152409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 152509a52d85SRichard Henderson 152609a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 152709a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 152809a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 152909a52d85SRichard Henderson } 153009a52d85SRichard Henderson 153109a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 153209a52d85SRichard Henderson { 153309a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 153409a52d85SRichard Henderson 153509a52d85SRichard Henderson tcg_gen_smin_vec(vece, t, a, b); 153609a52d85SRichard Henderson tcg_gen_smax_vec(vece, d, a, b); 153709a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 153809a52d85SRichard Henderson } 153909a52d85SRichard Henderson 154009a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 154109a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 154209a52d85SRichard Henderson { 154309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 154409a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 154509a52d85SRichard Henderson }; 154609a52d85SRichard Henderson static const GVecGen3 ops[4] = { 154709a52d85SRichard Henderson { .fniv = gen_sabd_vec, 154809a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_b, 154909a52d85SRichard Henderson .opt_opc = vecop_list, 155009a52d85SRichard Henderson .vece = MO_8 }, 155109a52d85SRichard Henderson { .fniv = gen_sabd_vec, 155209a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_h, 155309a52d85SRichard Henderson .opt_opc = vecop_list, 155409a52d85SRichard Henderson .vece = MO_16 }, 155509a52d85SRichard Henderson { .fni4 = gen_sabd_i32, 155609a52d85SRichard Henderson .fniv = gen_sabd_vec, 155709a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_s, 155809a52d85SRichard Henderson .opt_opc = vecop_list, 155909a52d85SRichard Henderson .vece = MO_32 }, 156009a52d85SRichard Henderson { .fni8 = gen_sabd_i64, 156109a52d85SRichard Henderson .fniv = gen_sabd_vec, 156209a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_d, 156309a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 156409a52d85SRichard Henderson .opt_opc = vecop_list, 156509a52d85SRichard Henderson .vece = MO_64 }, 156609a52d85SRichard Henderson }; 156709a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 156809a52d85SRichard Henderson } 156909a52d85SRichard Henderson 157009a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 157109a52d85SRichard Henderson { 157209a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 157309a52d85SRichard Henderson 157409a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 157509a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 157609a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 157709a52d85SRichard Henderson } 157809a52d85SRichard Henderson 157909a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 158009a52d85SRichard Henderson { 158109a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 158209a52d85SRichard Henderson 158309a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 158409a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 158509a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 158609a52d85SRichard Henderson } 158709a52d85SRichard Henderson 158809a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 158909a52d85SRichard Henderson { 159009a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 159109a52d85SRichard Henderson 159209a52d85SRichard Henderson tcg_gen_umin_vec(vece, t, a, b); 159309a52d85SRichard Henderson tcg_gen_umax_vec(vece, d, a, b); 159409a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 159509a52d85SRichard Henderson } 159609a52d85SRichard Henderson 159709a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 159809a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 159909a52d85SRichard Henderson { 160009a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 160109a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 160209a52d85SRichard Henderson }; 160309a52d85SRichard Henderson static const GVecGen3 ops[4] = { 160409a52d85SRichard Henderson { .fniv = gen_uabd_vec, 160509a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_b, 160609a52d85SRichard Henderson .opt_opc = vecop_list, 160709a52d85SRichard Henderson .vece = MO_8 }, 160809a52d85SRichard Henderson { .fniv = gen_uabd_vec, 160909a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_h, 161009a52d85SRichard Henderson .opt_opc = vecop_list, 161109a52d85SRichard Henderson .vece = MO_16 }, 161209a52d85SRichard Henderson { .fni4 = gen_uabd_i32, 161309a52d85SRichard Henderson .fniv = gen_uabd_vec, 161409a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_s, 161509a52d85SRichard Henderson .opt_opc = vecop_list, 161609a52d85SRichard Henderson .vece = MO_32 }, 161709a52d85SRichard Henderson { .fni8 = gen_uabd_i64, 161809a52d85SRichard Henderson .fniv = gen_uabd_vec, 161909a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_d, 162009a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 162109a52d85SRichard Henderson .opt_opc = vecop_list, 162209a52d85SRichard Henderson .vece = MO_64 }, 162309a52d85SRichard Henderson }; 162409a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 162509a52d85SRichard Henderson } 162609a52d85SRichard Henderson 162709a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 162809a52d85SRichard Henderson { 162909a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 163009a52d85SRichard Henderson gen_sabd_i32(t, a, b); 163109a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 163209a52d85SRichard Henderson } 163309a52d85SRichard Henderson 163409a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 163509a52d85SRichard Henderson { 163609a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 163709a52d85SRichard Henderson gen_sabd_i64(t, a, b); 163809a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 163909a52d85SRichard Henderson } 164009a52d85SRichard Henderson 164109a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 164209a52d85SRichard Henderson { 164309a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 164409a52d85SRichard Henderson gen_sabd_vec(vece, t, a, b); 164509a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 164609a52d85SRichard Henderson } 164709a52d85SRichard Henderson 164809a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 164909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 165009a52d85SRichard Henderson { 165109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 165209a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 165309a52d85SRichard Henderson INDEX_op_smin_vec, INDEX_op_smax_vec, 0 165409a52d85SRichard Henderson }; 165509a52d85SRichard Henderson static const GVecGen3 ops[4] = { 165609a52d85SRichard Henderson { .fniv = gen_saba_vec, 165709a52d85SRichard Henderson .fno = gen_helper_gvec_saba_b, 165809a52d85SRichard Henderson .opt_opc = vecop_list, 165909a52d85SRichard Henderson .load_dest = true, 166009a52d85SRichard Henderson .vece = MO_8 }, 166109a52d85SRichard Henderson { .fniv = gen_saba_vec, 166209a52d85SRichard Henderson .fno = gen_helper_gvec_saba_h, 166309a52d85SRichard Henderson .opt_opc = vecop_list, 166409a52d85SRichard Henderson .load_dest = true, 166509a52d85SRichard Henderson .vece = MO_16 }, 166609a52d85SRichard Henderson { .fni4 = gen_saba_i32, 166709a52d85SRichard Henderson .fniv = gen_saba_vec, 166809a52d85SRichard Henderson .fno = gen_helper_gvec_saba_s, 166909a52d85SRichard Henderson .opt_opc = vecop_list, 167009a52d85SRichard Henderson .load_dest = true, 167109a52d85SRichard Henderson .vece = MO_32 }, 167209a52d85SRichard Henderson { .fni8 = gen_saba_i64, 167309a52d85SRichard Henderson .fniv = gen_saba_vec, 167409a52d85SRichard Henderson .fno = gen_helper_gvec_saba_d, 167509a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 167609a52d85SRichard Henderson .opt_opc = vecop_list, 167709a52d85SRichard Henderson .load_dest = true, 167809a52d85SRichard Henderson .vece = MO_64 }, 167909a52d85SRichard Henderson }; 168009a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 168109a52d85SRichard Henderson } 168209a52d85SRichard Henderson 168309a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 168409a52d85SRichard Henderson { 168509a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 168609a52d85SRichard Henderson gen_uabd_i32(t, a, b); 168709a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 168809a52d85SRichard Henderson } 168909a52d85SRichard Henderson 169009a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 169109a52d85SRichard Henderson { 169209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 169309a52d85SRichard Henderson gen_uabd_i64(t, a, b); 169409a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 169509a52d85SRichard Henderson } 169609a52d85SRichard Henderson 169709a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 169809a52d85SRichard Henderson { 169909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 170009a52d85SRichard Henderson gen_uabd_vec(vece, t, a, b); 170109a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 170209a52d85SRichard Henderson } 170309a52d85SRichard Henderson 170409a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 170509a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 170609a52d85SRichard Henderson { 170709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 170809a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 170909a52d85SRichard Henderson INDEX_op_umin_vec, INDEX_op_umax_vec, 0 171009a52d85SRichard Henderson }; 171109a52d85SRichard Henderson static const GVecGen3 ops[4] = { 171209a52d85SRichard Henderson { .fniv = gen_uaba_vec, 171309a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_b, 171409a52d85SRichard Henderson .opt_opc = vecop_list, 171509a52d85SRichard Henderson .load_dest = true, 171609a52d85SRichard Henderson .vece = MO_8 }, 171709a52d85SRichard Henderson { .fniv = gen_uaba_vec, 171809a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_h, 171909a52d85SRichard Henderson .opt_opc = vecop_list, 172009a52d85SRichard Henderson .load_dest = true, 172109a52d85SRichard Henderson .vece = MO_16 }, 172209a52d85SRichard Henderson { .fni4 = gen_uaba_i32, 172309a52d85SRichard Henderson .fniv = gen_uaba_vec, 172409a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_s, 172509a52d85SRichard Henderson .opt_opc = vecop_list, 172609a52d85SRichard Henderson .load_dest = true, 172709a52d85SRichard Henderson .vece = MO_32 }, 172809a52d85SRichard Henderson { .fni8 = gen_uaba_i64, 172909a52d85SRichard Henderson .fniv = gen_uaba_vec, 173009a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_d, 173109a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 173209a52d85SRichard Henderson .opt_opc = vecop_list, 173309a52d85SRichard Henderson .load_dest = true, 173409a52d85SRichard Henderson .vece = MO_64 }, 173509a52d85SRichard Henderson }; 173609a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 173709a52d85SRichard Henderson } 1738a7e4eec6SRichard Henderson 1739a7e4eec6SRichard Henderson void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1740a7e4eec6SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1741a7e4eec6SRichard Henderson { 1742a7e4eec6SRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 1743a7e4eec6SRichard Henderson gen_helper_gvec_addp_b, 1744a7e4eec6SRichard Henderson gen_helper_gvec_addp_h, 1745a7e4eec6SRichard Henderson gen_helper_gvec_addp_s, 1746a7e4eec6SRichard Henderson gen_helper_gvec_addp_d, 1747a7e4eec6SRichard Henderson }; 1748a7e4eec6SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1749a7e4eec6SRichard Henderson } 175028b5451bSRichard Henderson 175128b5451bSRichard Henderson void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 175228b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 175328b5451bSRichard Henderson { 175428b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 175528b5451bSRichard Henderson gen_helper_gvec_smaxp_b, 175628b5451bSRichard Henderson gen_helper_gvec_smaxp_h, 175728b5451bSRichard Henderson gen_helper_gvec_smaxp_s, 175828b5451bSRichard Henderson }; 175928b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 176028b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 176128b5451bSRichard Henderson } 176228b5451bSRichard Henderson 176328b5451bSRichard Henderson void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 176428b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 176528b5451bSRichard Henderson { 176628b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 176728b5451bSRichard Henderson gen_helper_gvec_sminp_b, 176828b5451bSRichard Henderson gen_helper_gvec_sminp_h, 176928b5451bSRichard Henderson gen_helper_gvec_sminp_s, 177028b5451bSRichard Henderson }; 177128b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 177228b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 177328b5451bSRichard Henderson } 177428b5451bSRichard Henderson 177528b5451bSRichard Henderson void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 177628b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 177728b5451bSRichard Henderson { 177828b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 177928b5451bSRichard Henderson gen_helper_gvec_umaxp_b, 178028b5451bSRichard Henderson gen_helper_gvec_umaxp_h, 178128b5451bSRichard Henderson gen_helper_gvec_umaxp_s, 178228b5451bSRichard Henderson }; 178328b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 178428b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 178528b5451bSRichard Henderson } 178628b5451bSRichard Henderson 178728b5451bSRichard Henderson void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 178828b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 178928b5451bSRichard Henderson { 179028b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 179128b5451bSRichard Henderson gen_helper_gvec_uminp_b, 179228b5451bSRichard Henderson gen_helper_gvec_uminp_h, 179328b5451bSRichard Henderson gen_helper_gvec_uminp_s, 179428b5451bSRichard Henderson }; 179528b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 179628b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 179728b5451bSRichard Henderson } 1798