109a52d85SRichard Henderson /* 209a52d85SRichard Henderson * ARM generic vector expansion 309a52d85SRichard Henderson * 409a52d85SRichard Henderson * Copyright (c) 2003 Fabrice Bellard 509a52d85SRichard Henderson * Copyright (c) 2005-2007 CodeSourcery 609a52d85SRichard Henderson * Copyright (c) 2007 OpenedHand, Ltd. 709a52d85SRichard Henderson * 809a52d85SRichard Henderson * This library is free software; you can redistribute it and/or 909a52d85SRichard Henderson * modify it under the terms of the GNU Lesser General Public 1009a52d85SRichard Henderson * License as published by the Free Software Foundation; either 1109a52d85SRichard Henderson * version 2.1 of the License, or (at your option) any later version. 1209a52d85SRichard Henderson * 1309a52d85SRichard Henderson * This library is distributed in the hope that it will be useful, 1409a52d85SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 1509a52d85SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1609a52d85SRichard Henderson * Lesser General Public License for more details. 1709a52d85SRichard Henderson * 1809a52d85SRichard Henderson * You should have received a copy of the GNU Lesser General Public 1909a52d85SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 2009a52d85SRichard Henderson */ 2109a52d85SRichard Henderson 2209a52d85SRichard Henderson #include "qemu/osdep.h" 2309a52d85SRichard Henderson #include "translate.h" 2409a52d85SRichard Henderson 2509a52d85SRichard Henderson 2609a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 2709a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz, 2809a52d85SRichard Henderson gen_helper_gvec_3_ptr *fn) 2909a52d85SRichard Henderson { 3009a52d85SRichard Henderson TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 3109a52d85SRichard Henderson 3209a52d85SRichard Henderson tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 3309a52d85SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 3409a52d85SRichard Henderson opr_sz, max_sz, 0, fn); 3509a52d85SRichard Henderson } 3609a52d85SRichard Henderson 3709a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 3809a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 3909a52d85SRichard Henderson { 4009a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 4109a52d85SRichard Henderson gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 4209a52d85SRichard Henderson }; 4309a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 4409a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 4509a52d85SRichard Henderson } 4609a52d85SRichard Henderson 4709a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 4809a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 4909a52d85SRichard Henderson { 5009a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 5109a52d85SRichard Henderson gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 5209a52d85SRichard Henderson }; 5309a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 5409a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 5509a52d85SRichard Henderson } 5609a52d85SRichard Henderson 5709a52d85SRichard Henderson #define GEN_CMP0(NAME, COND) \ 5809a52d85SRichard Henderson void NAME(unsigned vece, uint32_t d, uint32_t m, \ 5909a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz) \ 6009a52d85SRichard Henderson { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 6109a52d85SRichard Henderson 6209a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 6309a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 6409a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 6509a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 6609a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 6709a52d85SRichard Henderson 6809a52d85SRichard Henderson #undef GEN_CMP0 6909a52d85SRichard Henderson 7009a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 7109a52d85SRichard Henderson { 7209a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, shift); 7309a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 7409a52d85SRichard Henderson } 7509a52d85SRichard Henderson 7609a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 7709a52d85SRichard Henderson { 7809a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, shift); 7909a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 8009a52d85SRichard Henderson } 8109a52d85SRichard Henderson 8209a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 8309a52d85SRichard Henderson { 8409a52d85SRichard Henderson tcg_gen_sari_i32(a, a, shift); 8509a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 8609a52d85SRichard Henderson } 8709a52d85SRichard Henderson 8809a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 8909a52d85SRichard Henderson { 9009a52d85SRichard Henderson tcg_gen_sari_i64(a, a, shift); 9109a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 9209a52d85SRichard Henderson } 9309a52d85SRichard Henderson 9409a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 9509a52d85SRichard Henderson { 9609a52d85SRichard Henderson tcg_gen_sari_vec(vece, a, a, sh); 9709a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 9809a52d85SRichard Henderson } 9909a52d85SRichard Henderson 10009a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 10109a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 10209a52d85SRichard Henderson { 10309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 10409a52d85SRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 10509a52d85SRichard Henderson }; 10609a52d85SRichard Henderson static const GVecGen2i ops[4] = { 10709a52d85SRichard Henderson { .fni8 = gen_ssra8_i64, 10809a52d85SRichard Henderson .fniv = gen_ssra_vec, 10909a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_b, 11009a52d85SRichard Henderson .load_dest = true, 11109a52d85SRichard Henderson .opt_opc = vecop_list, 11209a52d85SRichard Henderson .vece = MO_8 }, 11309a52d85SRichard Henderson { .fni8 = gen_ssra16_i64, 11409a52d85SRichard Henderson .fniv = gen_ssra_vec, 11509a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_h, 11609a52d85SRichard Henderson .load_dest = true, 11709a52d85SRichard Henderson .opt_opc = vecop_list, 11809a52d85SRichard Henderson .vece = MO_16 }, 11909a52d85SRichard Henderson { .fni4 = gen_ssra32_i32, 12009a52d85SRichard Henderson .fniv = gen_ssra_vec, 12109a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_s, 12209a52d85SRichard Henderson .load_dest = true, 12309a52d85SRichard Henderson .opt_opc = vecop_list, 12409a52d85SRichard Henderson .vece = MO_32 }, 12509a52d85SRichard Henderson { .fni8 = gen_ssra64_i64, 12609a52d85SRichard Henderson .fniv = gen_ssra_vec, 12709a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_d, 12809a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 12909a52d85SRichard Henderson .opt_opc = vecop_list, 13009a52d85SRichard Henderson .load_dest = true, 13109a52d85SRichard Henderson .vece = MO_64 }, 13209a52d85SRichard Henderson }; 13309a52d85SRichard Henderson 13409a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 13509a52d85SRichard Henderson tcg_debug_assert(shift > 0); 13609a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 13709a52d85SRichard Henderson 13809a52d85SRichard Henderson /* 13909a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 14009a52d85SRichard Henderson * Signed results in all sign bits. 14109a52d85SRichard Henderson */ 14209a52d85SRichard Henderson shift = MIN(shift, (8 << vece) - 1); 14309a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 14409a52d85SRichard Henderson } 14509a52d85SRichard Henderson 14609a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 14709a52d85SRichard Henderson { 14809a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, shift); 14909a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 15009a52d85SRichard Henderson } 15109a52d85SRichard Henderson 15209a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 15309a52d85SRichard Henderson { 15409a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, shift); 15509a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 15609a52d85SRichard Henderson } 15709a52d85SRichard Henderson 15809a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 15909a52d85SRichard Henderson { 16009a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 16109a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 16209a52d85SRichard Henderson } 16309a52d85SRichard Henderson 16409a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 16509a52d85SRichard Henderson { 16609a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 16709a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 16809a52d85SRichard Henderson } 16909a52d85SRichard Henderson 17009a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 17109a52d85SRichard Henderson { 17209a52d85SRichard Henderson tcg_gen_shri_vec(vece, a, a, sh); 17309a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 17409a52d85SRichard Henderson } 17509a52d85SRichard Henderson 17609a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 17709a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 17809a52d85SRichard Henderson { 17909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 18009a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 18109a52d85SRichard Henderson }; 18209a52d85SRichard Henderson static const GVecGen2i ops[4] = { 18309a52d85SRichard Henderson { .fni8 = gen_usra8_i64, 18409a52d85SRichard Henderson .fniv = gen_usra_vec, 18509a52d85SRichard Henderson .fno = gen_helper_gvec_usra_b, 18609a52d85SRichard Henderson .load_dest = true, 18709a52d85SRichard Henderson .opt_opc = vecop_list, 18809a52d85SRichard Henderson .vece = MO_8, }, 18909a52d85SRichard Henderson { .fni8 = gen_usra16_i64, 19009a52d85SRichard Henderson .fniv = gen_usra_vec, 19109a52d85SRichard Henderson .fno = gen_helper_gvec_usra_h, 19209a52d85SRichard Henderson .load_dest = true, 19309a52d85SRichard Henderson .opt_opc = vecop_list, 19409a52d85SRichard Henderson .vece = MO_16, }, 19509a52d85SRichard Henderson { .fni4 = gen_usra32_i32, 19609a52d85SRichard Henderson .fniv = gen_usra_vec, 19709a52d85SRichard Henderson .fno = gen_helper_gvec_usra_s, 19809a52d85SRichard Henderson .load_dest = true, 19909a52d85SRichard Henderson .opt_opc = vecop_list, 20009a52d85SRichard Henderson .vece = MO_32, }, 20109a52d85SRichard Henderson { .fni8 = gen_usra64_i64, 20209a52d85SRichard Henderson .fniv = gen_usra_vec, 20309a52d85SRichard Henderson .fno = gen_helper_gvec_usra_d, 20409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 20509a52d85SRichard Henderson .load_dest = true, 20609a52d85SRichard Henderson .opt_opc = vecop_list, 20709a52d85SRichard Henderson .vece = MO_64, }, 20809a52d85SRichard Henderson }; 20909a52d85SRichard Henderson 21009a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 21109a52d85SRichard Henderson tcg_debug_assert(shift > 0); 21209a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 21309a52d85SRichard Henderson 21409a52d85SRichard Henderson /* 21509a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 21609a52d85SRichard Henderson * Unsigned results in all zeros as input to accumulate: nop. 21709a52d85SRichard Henderson */ 21809a52d85SRichard Henderson if (shift < (8 << vece)) { 21909a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 22009a52d85SRichard Henderson } else { 22109a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 22209a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 22309a52d85SRichard Henderson } 22409a52d85SRichard Henderson } 22509a52d85SRichard Henderson 22609a52d85SRichard Henderson /* 22709a52d85SRichard Henderson * Shift one less than the requested amount, and the low bit is 22809a52d85SRichard Henderson * the rounding bit. For the 8 and 16-bit operations, because we 22909a52d85SRichard Henderson * mask the low bit, we can perform a normal integer shift instead 23009a52d85SRichard Henderson * of a vector shift. 23109a52d85SRichard Henderson */ 23209a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 23309a52d85SRichard Henderson { 23409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 23509a52d85SRichard Henderson 23609a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 23709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 23809a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(d, a, sh); 23909a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 24009a52d85SRichard Henderson } 24109a52d85SRichard Henderson 24209a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 24309a52d85SRichard Henderson { 24409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 24509a52d85SRichard Henderson 24609a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 24709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 24809a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(d, a, sh); 24909a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 25009a52d85SRichard Henderson } 25109a52d85SRichard Henderson 25209a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 25309a52d85SRichard Henderson { 25409a52d85SRichard Henderson TCGv_i32 t; 25509a52d85SRichard Henderson 25609a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 25709a52d85SRichard Henderson if (sh == 32) { 25809a52d85SRichard Henderson tcg_gen_movi_i32(d, 0); 25909a52d85SRichard Henderson return; 26009a52d85SRichard Henderson } 26109a52d85SRichard Henderson t = tcg_temp_new_i32(); 26209a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 26309a52d85SRichard Henderson tcg_gen_sari_i32(d, a, sh); 26409a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 26509a52d85SRichard Henderson } 26609a52d85SRichard Henderson 26709a52d85SRichard Henderson void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 26809a52d85SRichard Henderson { 26909a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 27009a52d85SRichard Henderson 27109a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 27209a52d85SRichard Henderson tcg_gen_sari_i64(d, a, sh); 27309a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 27409a52d85SRichard Henderson } 27509a52d85SRichard Henderson 27609a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 27709a52d85SRichard Henderson { 27809a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 27909a52d85SRichard Henderson TCGv_vec ones = tcg_temp_new_vec_matching(d); 28009a52d85SRichard Henderson 28109a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 28209a52d85SRichard Henderson tcg_gen_dupi_vec(vece, ones, 1); 28309a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 28409a52d85SRichard Henderson tcg_gen_sari_vec(vece, d, a, sh); 28509a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 28609a52d85SRichard Henderson } 28709a52d85SRichard Henderson 28809a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 28909a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 29009a52d85SRichard Henderson { 29109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 29209a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 29309a52d85SRichard Henderson }; 29409a52d85SRichard Henderson static const GVecGen2i ops[4] = { 29509a52d85SRichard Henderson { .fni8 = gen_srshr8_i64, 29609a52d85SRichard Henderson .fniv = gen_srshr_vec, 29709a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_b, 29809a52d85SRichard Henderson .opt_opc = vecop_list, 29909a52d85SRichard Henderson .vece = MO_8 }, 30009a52d85SRichard Henderson { .fni8 = gen_srshr16_i64, 30109a52d85SRichard Henderson .fniv = gen_srshr_vec, 30209a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_h, 30309a52d85SRichard Henderson .opt_opc = vecop_list, 30409a52d85SRichard Henderson .vece = MO_16 }, 30509a52d85SRichard Henderson { .fni4 = gen_srshr32_i32, 30609a52d85SRichard Henderson .fniv = gen_srshr_vec, 30709a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_s, 30809a52d85SRichard Henderson .opt_opc = vecop_list, 30909a52d85SRichard Henderson .vece = MO_32 }, 31009a52d85SRichard Henderson { .fni8 = gen_srshr64_i64, 31109a52d85SRichard Henderson .fniv = gen_srshr_vec, 31209a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_d, 31309a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 31409a52d85SRichard Henderson .opt_opc = vecop_list, 31509a52d85SRichard Henderson .vece = MO_64 }, 31609a52d85SRichard Henderson }; 31709a52d85SRichard Henderson 31809a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 31909a52d85SRichard Henderson tcg_debug_assert(shift > 0); 32009a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 32109a52d85SRichard Henderson 32209a52d85SRichard Henderson if (shift == (8 << vece)) { 32309a52d85SRichard Henderson /* 32409a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 32509a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 32609a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 32709a52d85SRichard Henderson * I.e. always zero. 32809a52d85SRichard Henderson */ 32909a52d85SRichard Henderson tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 33009a52d85SRichard Henderson } else { 33109a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 33209a52d85SRichard Henderson } 33309a52d85SRichard Henderson } 33409a52d85SRichard Henderson 33509a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 33609a52d85SRichard Henderson { 33709a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 33809a52d85SRichard Henderson 33909a52d85SRichard Henderson gen_srshr8_i64(t, a, sh); 34009a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 34109a52d85SRichard Henderson } 34209a52d85SRichard Henderson 34309a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 34409a52d85SRichard Henderson { 34509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 34609a52d85SRichard Henderson 34709a52d85SRichard Henderson gen_srshr16_i64(t, a, sh); 34809a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 34909a52d85SRichard Henderson } 35009a52d85SRichard Henderson 35109a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 35209a52d85SRichard Henderson { 35309a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 35409a52d85SRichard Henderson 35509a52d85SRichard Henderson gen_srshr32_i32(t, a, sh); 35609a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 35709a52d85SRichard Henderson } 35809a52d85SRichard Henderson 35909a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 36009a52d85SRichard Henderson { 36109a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 36209a52d85SRichard Henderson 36309a52d85SRichard Henderson gen_srshr64_i64(t, a, sh); 36409a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 36509a52d85SRichard Henderson } 36609a52d85SRichard Henderson 36709a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 36809a52d85SRichard Henderson { 36909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 37009a52d85SRichard Henderson 37109a52d85SRichard Henderson gen_srshr_vec(vece, t, a, sh); 37209a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 37309a52d85SRichard Henderson } 37409a52d85SRichard Henderson 37509a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 37609a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 37709a52d85SRichard Henderson { 37809a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 37909a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 38009a52d85SRichard Henderson }; 38109a52d85SRichard Henderson static const GVecGen2i ops[4] = { 38209a52d85SRichard Henderson { .fni8 = gen_srsra8_i64, 38309a52d85SRichard Henderson .fniv = gen_srsra_vec, 38409a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_b, 38509a52d85SRichard Henderson .opt_opc = vecop_list, 38609a52d85SRichard Henderson .load_dest = true, 38709a52d85SRichard Henderson .vece = MO_8 }, 38809a52d85SRichard Henderson { .fni8 = gen_srsra16_i64, 38909a52d85SRichard Henderson .fniv = gen_srsra_vec, 39009a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_h, 39109a52d85SRichard Henderson .opt_opc = vecop_list, 39209a52d85SRichard Henderson .load_dest = true, 39309a52d85SRichard Henderson .vece = MO_16 }, 39409a52d85SRichard Henderson { .fni4 = gen_srsra32_i32, 39509a52d85SRichard Henderson .fniv = gen_srsra_vec, 39609a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_s, 39709a52d85SRichard Henderson .opt_opc = vecop_list, 39809a52d85SRichard Henderson .load_dest = true, 39909a52d85SRichard Henderson .vece = MO_32 }, 40009a52d85SRichard Henderson { .fni8 = gen_srsra64_i64, 40109a52d85SRichard Henderson .fniv = gen_srsra_vec, 40209a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_d, 40309a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 40409a52d85SRichard Henderson .opt_opc = vecop_list, 40509a52d85SRichard Henderson .load_dest = true, 40609a52d85SRichard Henderson .vece = MO_64 }, 40709a52d85SRichard Henderson }; 40809a52d85SRichard Henderson 40909a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 41009a52d85SRichard Henderson tcg_debug_assert(shift > 0); 41109a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 41209a52d85SRichard Henderson 41309a52d85SRichard Henderson /* 41409a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 41509a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 41609a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 41709a52d85SRichard Henderson * I.e. always zero. With accumulation, this leaves D unchanged. 41809a52d85SRichard Henderson */ 41909a52d85SRichard Henderson if (shift == (8 << vece)) { 42009a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 42109a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 42209a52d85SRichard Henderson } else { 42309a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 42409a52d85SRichard Henderson } 42509a52d85SRichard Henderson } 42609a52d85SRichard Henderson 42709a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 42809a52d85SRichard Henderson { 42909a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 43009a52d85SRichard Henderson 43109a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 43209a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 43309a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(d, a, sh); 43409a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 43509a52d85SRichard Henderson } 43609a52d85SRichard Henderson 43709a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 43809a52d85SRichard Henderson { 43909a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 44009a52d85SRichard Henderson 44109a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 44209a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 44309a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(d, a, sh); 44409a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 44509a52d85SRichard Henderson } 44609a52d85SRichard Henderson 44709a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 44809a52d85SRichard Henderson { 44909a52d85SRichard Henderson TCGv_i32 t; 45009a52d85SRichard Henderson 45109a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 45209a52d85SRichard Henderson if (sh == 32) { 45309a52d85SRichard Henderson tcg_gen_extract_i32(d, a, sh - 1, 1); 45409a52d85SRichard Henderson return; 45509a52d85SRichard Henderson } 45609a52d85SRichard Henderson t = tcg_temp_new_i32(); 45709a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 45809a52d85SRichard Henderson tcg_gen_shri_i32(d, a, sh); 45909a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 46009a52d85SRichard Henderson } 46109a52d85SRichard Henderson 46209a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 46309a52d85SRichard Henderson { 46409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 46509a52d85SRichard Henderson 46609a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 46709a52d85SRichard Henderson tcg_gen_shri_i64(d, a, sh); 46809a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 46909a52d85SRichard Henderson } 47009a52d85SRichard Henderson 47109a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 47209a52d85SRichard Henderson { 47309a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 47409a52d85SRichard Henderson TCGv_vec ones = tcg_temp_new_vec_matching(d); 47509a52d85SRichard Henderson 47609a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, shift - 1); 47709a52d85SRichard Henderson tcg_gen_dupi_vec(vece, ones, 1); 47809a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 47909a52d85SRichard Henderson tcg_gen_shri_vec(vece, d, a, shift); 48009a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 48109a52d85SRichard Henderson } 48209a52d85SRichard Henderson 48309a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 48409a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 48509a52d85SRichard Henderson { 48609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 48709a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 48809a52d85SRichard Henderson }; 48909a52d85SRichard Henderson static const GVecGen2i ops[4] = { 49009a52d85SRichard Henderson { .fni8 = gen_urshr8_i64, 49109a52d85SRichard Henderson .fniv = gen_urshr_vec, 49209a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_b, 49309a52d85SRichard Henderson .opt_opc = vecop_list, 49409a52d85SRichard Henderson .vece = MO_8 }, 49509a52d85SRichard Henderson { .fni8 = gen_urshr16_i64, 49609a52d85SRichard Henderson .fniv = gen_urshr_vec, 49709a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_h, 49809a52d85SRichard Henderson .opt_opc = vecop_list, 49909a52d85SRichard Henderson .vece = MO_16 }, 50009a52d85SRichard Henderson { .fni4 = gen_urshr32_i32, 50109a52d85SRichard Henderson .fniv = gen_urshr_vec, 50209a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_s, 50309a52d85SRichard Henderson .opt_opc = vecop_list, 50409a52d85SRichard Henderson .vece = MO_32 }, 50509a52d85SRichard Henderson { .fni8 = gen_urshr64_i64, 50609a52d85SRichard Henderson .fniv = gen_urshr_vec, 50709a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_d, 50809a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 50909a52d85SRichard Henderson .opt_opc = vecop_list, 51009a52d85SRichard Henderson .vece = MO_64 }, 51109a52d85SRichard Henderson }; 51209a52d85SRichard Henderson 51309a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 51409a52d85SRichard Henderson tcg_debug_assert(shift > 0); 51509a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 51609a52d85SRichard Henderson 51709a52d85SRichard Henderson if (shift == (8 << vece)) { 51809a52d85SRichard Henderson /* 51909a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 52009a52d85SRichard Henderson * Unsigned results in zero. With rounding, this produces a 52109a52d85SRichard Henderson * copy of the most significant bit. 52209a52d85SRichard Henderson */ 52309a52d85SRichard Henderson tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 52409a52d85SRichard Henderson } else { 52509a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 52609a52d85SRichard Henderson } 52709a52d85SRichard Henderson } 52809a52d85SRichard Henderson 52909a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 53009a52d85SRichard Henderson { 53109a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 53209a52d85SRichard Henderson 53309a52d85SRichard Henderson if (sh == 8) { 53409a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(t, a, 7); 53509a52d85SRichard Henderson } else { 53609a52d85SRichard Henderson gen_urshr8_i64(t, a, sh); 53709a52d85SRichard Henderson } 53809a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 53909a52d85SRichard Henderson } 54009a52d85SRichard Henderson 54109a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 54209a52d85SRichard Henderson { 54309a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 54409a52d85SRichard Henderson 54509a52d85SRichard Henderson if (sh == 16) { 54609a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(t, a, 15); 54709a52d85SRichard Henderson } else { 54809a52d85SRichard Henderson gen_urshr16_i64(t, a, sh); 54909a52d85SRichard Henderson } 55009a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 55109a52d85SRichard Henderson } 55209a52d85SRichard Henderson 55309a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 55409a52d85SRichard Henderson { 55509a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 55609a52d85SRichard Henderson 55709a52d85SRichard Henderson if (sh == 32) { 55809a52d85SRichard Henderson tcg_gen_shri_i32(t, a, 31); 55909a52d85SRichard Henderson } else { 56009a52d85SRichard Henderson gen_urshr32_i32(t, a, sh); 56109a52d85SRichard Henderson } 56209a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 56309a52d85SRichard Henderson } 56409a52d85SRichard Henderson 56509a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 56609a52d85SRichard Henderson { 56709a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 56809a52d85SRichard Henderson 56909a52d85SRichard Henderson if (sh == 64) { 57009a52d85SRichard Henderson tcg_gen_shri_i64(t, a, 63); 57109a52d85SRichard Henderson } else { 57209a52d85SRichard Henderson gen_urshr64_i64(t, a, sh); 57309a52d85SRichard Henderson } 57409a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 57509a52d85SRichard Henderson } 57609a52d85SRichard Henderson 57709a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 57809a52d85SRichard Henderson { 57909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 58009a52d85SRichard Henderson 58109a52d85SRichard Henderson if (sh == (8 << vece)) { 58209a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 58309a52d85SRichard Henderson } else { 58409a52d85SRichard Henderson gen_urshr_vec(vece, t, a, sh); 58509a52d85SRichard Henderson } 58609a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 58709a52d85SRichard Henderson } 58809a52d85SRichard Henderson 58909a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 59009a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 59109a52d85SRichard Henderson { 59209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 59309a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 59409a52d85SRichard Henderson }; 59509a52d85SRichard Henderson static const GVecGen2i ops[4] = { 59609a52d85SRichard Henderson { .fni8 = gen_ursra8_i64, 59709a52d85SRichard Henderson .fniv = gen_ursra_vec, 59809a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_b, 59909a52d85SRichard Henderson .opt_opc = vecop_list, 60009a52d85SRichard Henderson .load_dest = true, 60109a52d85SRichard Henderson .vece = MO_8 }, 60209a52d85SRichard Henderson { .fni8 = gen_ursra16_i64, 60309a52d85SRichard Henderson .fniv = gen_ursra_vec, 60409a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_h, 60509a52d85SRichard Henderson .opt_opc = vecop_list, 60609a52d85SRichard Henderson .load_dest = true, 60709a52d85SRichard Henderson .vece = MO_16 }, 60809a52d85SRichard Henderson { .fni4 = gen_ursra32_i32, 60909a52d85SRichard Henderson .fniv = gen_ursra_vec, 61009a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_s, 61109a52d85SRichard Henderson .opt_opc = vecop_list, 61209a52d85SRichard Henderson .load_dest = true, 61309a52d85SRichard Henderson .vece = MO_32 }, 61409a52d85SRichard Henderson { .fni8 = gen_ursra64_i64, 61509a52d85SRichard Henderson .fniv = gen_ursra_vec, 61609a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_d, 61709a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 61809a52d85SRichard Henderson .opt_opc = vecop_list, 61909a52d85SRichard Henderson .load_dest = true, 62009a52d85SRichard Henderson .vece = MO_64 }, 62109a52d85SRichard Henderson }; 62209a52d85SRichard Henderson 62309a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 62409a52d85SRichard Henderson tcg_debug_assert(shift > 0); 62509a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 62609a52d85SRichard Henderson 62709a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 62809a52d85SRichard Henderson } 62909a52d85SRichard Henderson 63009a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 63109a52d85SRichard Henderson { 63209a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff >> shift); 63309a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 63409a52d85SRichard Henderson 63509a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 63609a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 63709a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 63809a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 63909a52d85SRichard Henderson } 64009a52d85SRichard Henderson 64109a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 64209a52d85SRichard Henderson { 64309a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff >> shift); 64409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 64509a52d85SRichard Henderson 64609a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 64709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 64809a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 64909a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 65009a52d85SRichard Henderson } 65109a52d85SRichard Henderson 65209a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 65309a52d85SRichard Henderson { 65409a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 65509a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 65609a52d85SRichard Henderson } 65709a52d85SRichard Henderson 65809a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 65909a52d85SRichard Henderson { 66009a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 66109a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 66209a52d85SRichard Henderson } 66309a52d85SRichard Henderson 66409a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 66509a52d85SRichard Henderson { 66609a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 66709a52d85SRichard Henderson TCGv_vec m = tcg_temp_new_vec_matching(d); 66809a52d85SRichard Henderson 66909a52d85SRichard Henderson tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 67009a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh); 67109a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 67209a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 67309a52d85SRichard Henderson } 67409a52d85SRichard Henderson 67509a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 67609a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 67709a52d85SRichard Henderson { 67809a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 67909a52d85SRichard Henderson const GVecGen2i ops[4] = { 68009a52d85SRichard Henderson { .fni8 = gen_shr8_ins_i64, 68109a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 68209a52d85SRichard Henderson .fno = gen_helper_gvec_sri_b, 68309a52d85SRichard Henderson .load_dest = true, 68409a52d85SRichard Henderson .opt_opc = vecop_list, 68509a52d85SRichard Henderson .vece = MO_8 }, 68609a52d85SRichard Henderson { .fni8 = gen_shr16_ins_i64, 68709a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 68809a52d85SRichard Henderson .fno = gen_helper_gvec_sri_h, 68909a52d85SRichard Henderson .load_dest = true, 69009a52d85SRichard Henderson .opt_opc = vecop_list, 69109a52d85SRichard Henderson .vece = MO_16 }, 69209a52d85SRichard Henderson { .fni4 = gen_shr32_ins_i32, 69309a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 69409a52d85SRichard Henderson .fno = gen_helper_gvec_sri_s, 69509a52d85SRichard Henderson .load_dest = true, 69609a52d85SRichard Henderson .opt_opc = vecop_list, 69709a52d85SRichard Henderson .vece = MO_32 }, 69809a52d85SRichard Henderson { .fni8 = gen_shr64_ins_i64, 69909a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 70009a52d85SRichard Henderson .fno = gen_helper_gvec_sri_d, 70109a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 70209a52d85SRichard Henderson .load_dest = true, 70309a52d85SRichard Henderson .opt_opc = vecop_list, 70409a52d85SRichard Henderson .vece = MO_64 }, 70509a52d85SRichard Henderson }; 70609a52d85SRichard Henderson 70709a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 70809a52d85SRichard Henderson tcg_debug_assert(shift > 0); 70909a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 71009a52d85SRichard Henderson 71109a52d85SRichard Henderson /* Shift of esize leaves destination unchanged. */ 71209a52d85SRichard Henderson if (shift < (8 << vece)) { 71309a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 71409a52d85SRichard Henderson } else { 71509a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 71609a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 71709a52d85SRichard Henderson } 71809a52d85SRichard Henderson } 71909a52d85SRichard Henderson 72009a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 72109a52d85SRichard Henderson { 72209a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff << shift); 72309a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 72409a52d85SRichard Henderson 72509a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 72609a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 72709a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 72809a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 72909a52d85SRichard Henderson } 73009a52d85SRichard Henderson 73109a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 73209a52d85SRichard Henderson { 73309a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff << shift); 73409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 73509a52d85SRichard Henderson 73609a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 73709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 73809a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 73909a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 74009a52d85SRichard Henderson } 74109a52d85SRichard Henderson 74209a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 74309a52d85SRichard Henderson { 74409a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 74509a52d85SRichard Henderson } 74609a52d85SRichard Henderson 74709a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 74809a52d85SRichard Henderson { 74909a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 75009a52d85SRichard Henderson } 75109a52d85SRichard Henderson 75209a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 75309a52d85SRichard Henderson { 75409a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 75509a52d85SRichard Henderson TCGv_vec m = tcg_temp_new_vec_matching(d); 75609a52d85SRichard Henderson 75709a52d85SRichard Henderson tcg_gen_shli_vec(vece, t, a, sh); 75809a52d85SRichard Henderson tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 75909a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 76009a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 76109a52d85SRichard Henderson } 76209a52d85SRichard Henderson 76309a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 76409a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 76509a52d85SRichard Henderson { 76609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 76709a52d85SRichard Henderson const GVecGen2i ops[4] = { 76809a52d85SRichard Henderson { .fni8 = gen_shl8_ins_i64, 76909a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 77009a52d85SRichard Henderson .fno = gen_helper_gvec_sli_b, 77109a52d85SRichard Henderson .load_dest = true, 77209a52d85SRichard Henderson .opt_opc = vecop_list, 77309a52d85SRichard Henderson .vece = MO_8 }, 77409a52d85SRichard Henderson { .fni8 = gen_shl16_ins_i64, 77509a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 77609a52d85SRichard Henderson .fno = gen_helper_gvec_sli_h, 77709a52d85SRichard Henderson .load_dest = true, 77809a52d85SRichard Henderson .opt_opc = vecop_list, 77909a52d85SRichard Henderson .vece = MO_16 }, 78009a52d85SRichard Henderson { .fni4 = gen_shl32_ins_i32, 78109a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 78209a52d85SRichard Henderson .fno = gen_helper_gvec_sli_s, 78309a52d85SRichard Henderson .load_dest = true, 78409a52d85SRichard Henderson .opt_opc = vecop_list, 78509a52d85SRichard Henderson .vece = MO_32 }, 78609a52d85SRichard Henderson { .fni8 = gen_shl64_ins_i64, 78709a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 78809a52d85SRichard Henderson .fno = gen_helper_gvec_sli_d, 78909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 79009a52d85SRichard Henderson .load_dest = true, 79109a52d85SRichard Henderson .opt_opc = vecop_list, 79209a52d85SRichard Henderson .vece = MO_64 }, 79309a52d85SRichard Henderson }; 79409a52d85SRichard Henderson 79509a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [0..esize-1]. */ 79609a52d85SRichard Henderson tcg_debug_assert(shift >= 0); 79709a52d85SRichard Henderson tcg_debug_assert(shift < (8 << vece)); 79809a52d85SRichard Henderson 79909a52d85SRichard Henderson if (shift == 0) { 80009a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 80109a52d85SRichard Henderson } else { 80209a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 80309a52d85SRichard Henderson } 80409a52d85SRichard Henderson } 80509a52d85SRichard Henderson 80609a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 80709a52d85SRichard Henderson { 80809a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 80909a52d85SRichard Henderson gen_helper_neon_add_u8(d, d, a); 81009a52d85SRichard Henderson } 81109a52d85SRichard Henderson 81209a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 81309a52d85SRichard Henderson { 81409a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 81509a52d85SRichard Henderson gen_helper_neon_sub_u8(d, d, a); 81609a52d85SRichard Henderson } 81709a52d85SRichard Henderson 81809a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 81909a52d85SRichard Henderson { 82009a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 82109a52d85SRichard Henderson gen_helper_neon_add_u16(d, d, a); 82209a52d85SRichard Henderson } 82309a52d85SRichard Henderson 82409a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 82509a52d85SRichard Henderson { 82609a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 82709a52d85SRichard Henderson gen_helper_neon_sub_u16(d, d, a); 82809a52d85SRichard Henderson } 82909a52d85SRichard Henderson 83009a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 83109a52d85SRichard Henderson { 83209a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 83309a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 83409a52d85SRichard Henderson } 83509a52d85SRichard Henderson 83609a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 83709a52d85SRichard Henderson { 83809a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 83909a52d85SRichard Henderson tcg_gen_sub_i32(d, d, a); 84009a52d85SRichard Henderson } 84109a52d85SRichard Henderson 84209a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 84309a52d85SRichard Henderson { 84409a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 84509a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 84609a52d85SRichard Henderson } 84709a52d85SRichard Henderson 84809a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 84909a52d85SRichard Henderson { 85009a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 85109a52d85SRichard Henderson tcg_gen_sub_i64(d, d, a); 85209a52d85SRichard Henderson } 85309a52d85SRichard Henderson 85409a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 85509a52d85SRichard Henderson { 85609a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 85709a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 85809a52d85SRichard Henderson } 85909a52d85SRichard Henderson 86009a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 86109a52d85SRichard Henderson { 86209a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 86309a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, a); 86409a52d85SRichard Henderson } 86509a52d85SRichard Henderson 86609a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 86709a52d85SRichard Henderson * these tables are shared with AArch64 which does support them. 86809a52d85SRichard Henderson */ 86909a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 87009a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 87109a52d85SRichard Henderson { 87209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 87309a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_add_vec, 0 87409a52d85SRichard Henderson }; 87509a52d85SRichard Henderson static const GVecGen3 ops[4] = { 87609a52d85SRichard Henderson { .fni4 = gen_mla8_i32, 87709a52d85SRichard Henderson .fniv = gen_mla_vec, 87809a52d85SRichard Henderson .load_dest = true, 87909a52d85SRichard Henderson .opt_opc = vecop_list, 88009a52d85SRichard Henderson .vece = MO_8 }, 88109a52d85SRichard Henderson { .fni4 = gen_mla16_i32, 88209a52d85SRichard Henderson .fniv = gen_mla_vec, 88309a52d85SRichard Henderson .load_dest = true, 88409a52d85SRichard Henderson .opt_opc = vecop_list, 88509a52d85SRichard Henderson .vece = MO_16 }, 88609a52d85SRichard Henderson { .fni4 = gen_mla32_i32, 88709a52d85SRichard Henderson .fniv = gen_mla_vec, 88809a52d85SRichard Henderson .load_dest = true, 88909a52d85SRichard Henderson .opt_opc = vecop_list, 89009a52d85SRichard Henderson .vece = MO_32 }, 89109a52d85SRichard Henderson { .fni8 = gen_mla64_i64, 89209a52d85SRichard Henderson .fniv = gen_mla_vec, 89309a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 89409a52d85SRichard Henderson .load_dest = true, 89509a52d85SRichard Henderson .opt_opc = vecop_list, 89609a52d85SRichard Henderson .vece = MO_64 }, 89709a52d85SRichard Henderson }; 89809a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 89909a52d85SRichard Henderson } 90009a52d85SRichard Henderson 90109a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 90209a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 90309a52d85SRichard Henderson { 90409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 90509a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_sub_vec, 0 90609a52d85SRichard Henderson }; 90709a52d85SRichard Henderson static const GVecGen3 ops[4] = { 90809a52d85SRichard Henderson { .fni4 = gen_mls8_i32, 90909a52d85SRichard Henderson .fniv = gen_mls_vec, 91009a52d85SRichard Henderson .load_dest = true, 91109a52d85SRichard Henderson .opt_opc = vecop_list, 91209a52d85SRichard Henderson .vece = MO_8 }, 91309a52d85SRichard Henderson { .fni4 = gen_mls16_i32, 91409a52d85SRichard Henderson .fniv = gen_mls_vec, 91509a52d85SRichard Henderson .load_dest = true, 91609a52d85SRichard Henderson .opt_opc = vecop_list, 91709a52d85SRichard Henderson .vece = MO_16 }, 91809a52d85SRichard Henderson { .fni4 = gen_mls32_i32, 91909a52d85SRichard Henderson .fniv = gen_mls_vec, 92009a52d85SRichard Henderson .load_dest = true, 92109a52d85SRichard Henderson .opt_opc = vecop_list, 92209a52d85SRichard Henderson .vece = MO_32 }, 92309a52d85SRichard Henderson { .fni8 = gen_mls64_i64, 92409a52d85SRichard Henderson .fniv = gen_mls_vec, 92509a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 92609a52d85SRichard Henderson .load_dest = true, 92709a52d85SRichard Henderson .opt_opc = vecop_list, 92809a52d85SRichard Henderson .vece = MO_64 }, 92909a52d85SRichard Henderson }; 93009a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 93109a52d85SRichard Henderson } 93209a52d85SRichard Henderson 93309a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */ 93409a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 93509a52d85SRichard Henderson { 93609a52d85SRichard Henderson tcg_gen_and_i32(d, a, b); 93709a52d85SRichard Henderson tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); 93809a52d85SRichard Henderson } 93909a52d85SRichard Henderson 94009a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 94109a52d85SRichard Henderson { 94209a52d85SRichard Henderson tcg_gen_and_i64(d, a, b); 94309a52d85SRichard Henderson tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); 94409a52d85SRichard Henderson } 94509a52d85SRichard Henderson 94609a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 94709a52d85SRichard Henderson { 94809a52d85SRichard Henderson tcg_gen_and_vec(vece, d, a, b); 94909a52d85SRichard Henderson tcg_gen_dupi_vec(vece, a, 0); 95009a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); 95109a52d85SRichard Henderson } 95209a52d85SRichard Henderson 95309a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 95409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 95509a52d85SRichard Henderson { 95609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 95709a52d85SRichard Henderson static const GVecGen3 ops[4] = { 95809a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u8, 95909a52d85SRichard Henderson .fniv = gen_cmtst_vec, 96009a52d85SRichard Henderson .opt_opc = vecop_list, 96109a52d85SRichard Henderson .vece = MO_8 }, 96209a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u16, 96309a52d85SRichard Henderson .fniv = gen_cmtst_vec, 96409a52d85SRichard Henderson .opt_opc = vecop_list, 96509a52d85SRichard Henderson .vece = MO_16 }, 96609a52d85SRichard Henderson { .fni4 = gen_cmtst_i32, 96709a52d85SRichard Henderson .fniv = gen_cmtst_vec, 96809a52d85SRichard Henderson .opt_opc = vecop_list, 96909a52d85SRichard Henderson .vece = MO_32 }, 97009a52d85SRichard Henderson { .fni8 = gen_cmtst_i64, 97109a52d85SRichard Henderson .fniv = gen_cmtst_vec, 97209a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 97309a52d85SRichard Henderson .opt_opc = vecop_list, 97409a52d85SRichard Henderson .vece = MO_64 }, 97509a52d85SRichard Henderson }; 97609a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 97709a52d85SRichard Henderson } 97809a52d85SRichard Henderson 97909a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 98009a52d85SRichard Henderson { 98109a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 98209a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 98309a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 98409a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 98509a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 98609a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(32); 98709a52d85SRichard Henderson 98809a52d85SRichard Henderson /* 98909a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 99009a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 99109a52d85SRichard Henderson * Discard out-of-range results after the fact. 99209a52d85SRichard Henderson */ 99309a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 99409a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 99509a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 99609a52d85SRichard Henderson tcg_gen_shr_i32(rval, src, rsh); 99709a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 99809a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 99909a52d85SRichard Henderson } 100009a52d85SRichard Henderson 100109a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 100209a52d85SRichard Henderson { 100309a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 100409a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 100509a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 100609a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 100709a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 100809a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(64); 100909a52d85SRichard Henderson 101009a52d85SRichard Henderson /* 101109a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 101209a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 101309a52d85SRichard Henderson * Discard out-of-range results after the fact. 101409a52d85SRichard Henderson */ 101509a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 101609a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 101709a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 101809a52d85SRichard Henderson tcg_gen_shr_i64(rval, src, rsh); 101909a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 102009a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 102109a52d85SRichard Henderson } 102209a52d85SRichard Henderson 102309a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 102409a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 102509a52d85SRichard Henderson { 102609a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 102709a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 102809a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 102909a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 103009a52d85SRichard Henderson TCGv_vec msk, max; 103109a52d85SRichard Henderson 103209a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 103309a52d85SRichard Henderson if (vece == MO_8) { 103409a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 103509a52d85SRichard Henderson } else { 103609a52d85SRichard Henderson msk = tcg_temp_new_vec_matching(dst); 103709a52d85SRichard Henderson tcg_gen_dupi_vec(vece, msk, 0xff); 103809a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, msk); 103909a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, msk); 104009a52d85SRichard Henderson } 104109a52d85SRichard Henderson 104209a52d85SRichard Henderson /* 104309a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 104409a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 104509a52d85SRichard Henderson * Discard out-of-range results after the fact. 104609a52d85SRichard Henderson */ 104709a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 104809a52d85SRichard Henderson tcg_gen_shrv_vec(vece, rval, src, rsh); 104909a52d85SRichard Henderson 105009a52d85SRichard Henderson max = tcg_temp_new_vec_matching(dst); 105109a52d85SRichard Henderson tcg_gen_dupi_vec(vece, max, 8 << vece); 105209a52d85SRichard Henderson 105309a52d85SRichard Henderson /* 105409a52d85SRichard Henderson * The choice of LT (signed) and GEU (unsigned) are biased toward 105509a52d85SRichard Henderson * the instructions of the x86_64 host. For MO_8, the whole byte 105609a52d85SRichard Henderson * is significant so we must use an unsigned compare; otherwise we 105709a52d85SRichard Henderson * have already masked to a byte and so a signed compare works. 105809a52d85SRichard Henderson * Other tcg hosts have a full set of comparisons and do not care. 105909a52d85SRichard Henderson */ 106009a52d85SRichard Henderson if (vece == MO_8) { 106109a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 106209a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 106309a52d85SRichard Henderson tcg_gen_andc_vec(vece, lval, lval, lsh); 106409a52d85SRichard Henderson tcg_gen_andc_vec(vece, rval, rval, rsh); 106509a52d85SRichard Henderson } else { 106609a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 106709a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 106809a52d85SRichard Henderson tcg_gen_and_vec(vece, lval, lval, lsh); 106909a52d85SRichard Henderson tcg_gen_and_vec(vece, rval, rval, rsh); 107009a52d85SRichard Henderson } 107109a52d85SRichard Henderson tcg_gen_or_vec(vece, dst, lval, rval); 107209a52d85SRichard Henderson } 107309a52d85SRichard Henderson 107409a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 107509a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 107609a52d85SRichard Henderson { 107709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 107809a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_shlv_vec, 107909a52d85SRichard Henderson INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 108009a52d85SRichard Henderson }; 108109a52d85SRichard Henderson static const GVecGen3 ops[4] = { 108209a52d85SRichard Henderson { .fniv = gen_ushl_vec, 108309a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_b, 108409a52d85SRichard Henderson .opt_opc = vecop_list, 108509a52d85SRichard Henderson .vece = MO_8 }, 108609a52d85SRichard Henderson { .fniv = gen_ushl_vec, 108709a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_h, 108809a52d85SRichard Henderson .opt_opc = vecop_list, 108909a52d85SRichard Henderson .vece = MO_16 }, 109009a52d85SRichard Henderson { .fni4 = gen_ushl_i32, 109109a52d85SRichard Henderson .fniv = gen_ushl_vec, 109209a52d85SRichard Henderson .opt_opc = vecop_list, 109309a52d85SRichard Henderson .vece = MO_32 }, 109409a52d85SRichard Henderson { .fni8 = gen_ushl_i64, 109509a52d85SRichard Henderson .fniv = gen_ushl_vec, 109609a52d85SRichard Henderson .opt_opc = vecop_list, 109709a52d85SRichard Henderson .vece = MO_64 }, 109809a52d85SRichard Henderson }; 109909a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 110009a52d85SRichard Henderson } 110109a52d85SRichard Henderson 110209a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 110309a52d85SRichard Henderson { 110409a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 110509a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 110609a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 110709a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 110809a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 110909a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(31); 111009a52d85SRichard Henderson 111109a52d85SRichard Henderson /* 111209a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 111309a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 111409a52d85SRichard Henderson * Discard out-of-range results after the fact. 111509a52d85SRichard Henderson */ 111609a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 111709a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 111809a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 111909a52d85SRichard Henderson tcg_gen_umin_i32(rsh, rsh, max); 112009a52d85SRichard Henderson tcg_gen_sar_i32(rval, src, rsh); 112109a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 112209a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 112309a52d85SRichard Henderson } 112409a52d85SRichard Henderson 112509a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 112609a52d85SRichard Henderson { 112709a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 112809a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 112909a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 113009a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 113109a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 113209a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(63); 113309a52d85SRichard Henderson 113409a52d85SRichard Henderson /* 113509a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 113609a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 113709a52d85SRichard Henderson * Discard out-of-range results after the fact. 113809a52d85SRichard Henderson */ 113909a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 114009a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 114109a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 114209a52d85SRichard Henderson tcg_gen_umin_i64(rsh, rsh, max); 114309a52d85SRichard Henderson tcg_gen_sar_i64(rval, src, rsh); 114409a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 114509a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 114609a52d85SRichard Henderson } 114709a52d85SRichard Henderson 114809a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 114909a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 115009a52d85SRichard Henderson { 115109a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 115209a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 115309a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 115409a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 115509a52d85SRichard Henderson TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 115609a52d85SRichard Henderson 115709a52d85SRichard Henderson /* 115809a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 115909a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 116009a52d85SRichard Henderson * Discard out-of-range results after the fact. 116109a52d85SRichard Henderson */ 116209a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 116309a52d85SRichard Henderson if (vece == MO_8) { 116409a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 116509a52d85SRichard Henderson } else { 116609a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0xff); 116709a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, tmp); 116809a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, tmp); 116909a52d85SRichard Henderson } 117009a52d85SRichard Henderson 117109a52d85SRichard Henderson /* Bound rsh so out of bound right shift gets -1. */ 117209a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 117309a52d85SRichard Henderson tcg_gen_umin_vec(vece, rsh, rsh, tmp); 117409a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 117509a52d85SRichard Henderson 117609a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 117709a52d85SRichard Henderson tcg_gen_sarv_vec(vece, rval, src, rsh); 117809a52d85SRichard Henderson 117909a52d85SRichard Henderson /* Select in-bound left shift. */ 118009a52d85SRichard Henderson tcg_gen_andc_vec(vece, lval, lval, tmp); 118109a52d85SRichard Henderson 118209a52d85SRichard Henderson /* Select between left and right shift. */ 118309a52d85SRichard Henderson if (vece == MO_8) { 118409a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0); 118509a52d85SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 118609a52d85SRichard Henderson } else { 118709a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0x80); 118809a52d85SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 118909a52d85SRichard Henderson } 119009a52d85SRichard Henderson } 119109a52d85SRichard Henderson 119209a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 119309a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 119409a52d85SRichard Henderson { 119509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 119609a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 119709a52d85SRichard Henderson INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 119809a52d85SRichard Henderson }; 119909a52d85SRichard Henderson static const GVecGen3 ops[4] = { 120009a52d85SRichard Henderson { .fniv = gen_sshl_vec, 120109a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_b, 120209a52d85SRichard Henderson .opt_opc = vecop_list, 120309a52d85SRichard Henderson .vece = MO_8 }, 120409a52d85SRichard Henderson { .fniv = gen_sshl_vec, 120509a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_h, 120609a52d85SRichard Henderson .opt_opc = vecop_list, 120709a52d85SRichard Henderson .vece = MO_16 }, 120809a52d85SRichard Henderson { .fni4 = gen_sshl_i32, 120909a52d85SRichard Henderson .fniv = gen_sshl_vec, 121009a52d85SRichard Henderson .opt_opc = vecop_list, 121109a52d85SRichard Henderson .vece = MO_32 }, 121209a52d85SRichard Henderson { .fni8 = gen_sshl_i64, 121309a52d85SRichard Henderson .fniv = gen_sshl_vec, 121409a52d85SRichard Henderson .opt_opc = vecop_list, 121509a52d85SRichard Henderson .vece = MO_64 }, 121609a52d85SRichard Henderson }; 121709a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 121809a52d85SRichard Henderson } 121909a52d85SRichard Henderson 1220*76f4a8aeSRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 122109a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 122209a52d85SRichard Henderson { 122309a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 122409a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 122509a52d85SRichard Henderson tcg_gen_usadd_vec(vece, t, a, b); 1226*76f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 1227*76f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 122809a52d85SRichard Henderson } 122909a52d85SRichard Henderson 123009a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 123109a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 123209a52d85SRichard Henderson { 123309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1234*76f4a8aeSRichard Henderson INDEX_op_usadd_vec, INDEX_op_add_vec, 0 123509a52d85SRichard Henderson }; 123609a52d85SRichard Henderson static const GVecGen4 ops[4] = { 123709a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 123809a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_b, 123909a52d85SRichard Henderson .write_aofs = true, 124009a52d85SRichard Henderson .opt_opc = vecop_list, 124109a52d85SRichard Henderson .vece = MO_8 }, 124209a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 124309a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_h, 124409a52d85SRichard Henderson .write_aofs = true, 124509a52d85SRichard Henderson .opt_opc = vecop_list, 124609a52d85SRichard Henderson .vece = MO_16 }, 124709a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 124809a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_s, 124909a52d85SRichard Henderson .write_aofs = true, 125009a52d85SRichard Henderson .opt_opc = vecop_list, 125109a52d85SRichard Henderson .vece = MO_32 }, 125209a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 125309a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_d, 125409a52d85SRichard Henderson .write_aofs = true, 125509a52d85SRichard Henderson .opt_opc = vecop_list, 125609a52d85SRichard Henderson .vece = MO_64 }, 125709a52d85SRichard Henderson }; 125809a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 125909a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 126009a52d85SRichard Henderson } 126109a52d85SRichard Henderson 1262*76f4a8aeSRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 126309a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 126409a52d85SRichard Henderson { 126509a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 126609a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 126709a52d85SRichard Henderson tcg_gen_ssadd_vec(vece, t, a, b); 1268*76f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 1269*76f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 127009a52d85SRichard Henderson } 127109a52d85SRichard Henderson 127209a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 127309a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 127409a52d85SRichard Henderson { 127509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1276*76f4a8aeSRichard Henderson INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 127709a52d85SRichard Henderson }; 127809a52d85SRichard Henderson static const GVecGen4 ops[4] = { 127909a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 128009a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_b, 128109a52d85SRichard Henderson .opt_opc = vecop_list, 128209a52d85SRichard Henderson .write_aofs = true, 128309a52d85SRichard Henderson .vece = MO_8 }, 128409a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 128509a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_h, 128609a52d85SRichard Henderson .opt_opc = vecop_list, 128709a52d85SRichard Henderson .write_aofs = true, 128809a52d85SRichard Henderson .vece = MO_16 }, 128909a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 129009a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_s, 129109a52d85SRichard Henderson .opt_opc = vecop_list, 129209a52d85SRichard Henderson .write_aofs = true, 129309a52d85SRichard Henderson .vece = MO_32 }, 129409a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 129509a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_d, 129609a52d85SRichard Henderson .opt_opc = vecop_list, 129709a52d85SRichard Henderson .write_aofs = true, 129809a52d85SRichard Henderson .vece = MO_64 }, 129909a52d85SRichard Henderson }; 130009a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 130109a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 130209a52d85SRichard Henderson } 130309a52d85SRichard Henderson 1304*76f4a8aeSRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 130509a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 130609a52d85SRichard Henderson { 130709a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 130809a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 130909a52d85SRichard Henderson tcg_gen_ussub_vec(vece, t, a, b); 1310*76f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 1311*76f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 131209a52d85SRichard Henderson } 131309a52d85SRichard Henderson 131409a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 131509a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 131609a52d85SRichard Henderson { 131709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1318*76f4a8aeSRichard Henderson INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 131909a52d85SRichard Henderson }; 132009a52d85SRichard Henderson static const GVecGen4 ops[4] = { 132109a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 132209a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_b, 132309a52d85SRichard Henderson .opt_opc = vecop_list, 132409a52d85SRichard Henderson .write_aofs = true, 132509a52d85SRichard Henderson .vece = MO_8 }, 132609a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 132709a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_h, 132809a52d85SRichard Henderson .opt_opc = vecop_list, 132909a52d85SRichard Henderson .write_aofs = true, 133009a52d85SRichard Henderson .vece = MO_16 }, 133109a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 133209a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_s, 133309a52d85SRichard Henderson .opt_opc = vecop_list, 133409a52d85SRichard Henderson .write_aofs = true, 133509a52d85SRichard Henderson .vece = MO_32 }, 133609a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 133709a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_d, 133809a52d85SRichard Henderson .opt_opc = vecop_list, 133909a52d85SRichard Henderson .write_aofs = true, 134009a52d85SRichard Henderson .vece = MO_64 }, 134109a52d85SRichard Henderson }; 134209a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 134309a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 134409a52d85SRichard Henderson } 134509a52d85SRichard Henderson 1346*76f4a8aeSRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 134709a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 134809a52d85SRichard Henderson { 134909a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 135009a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 135109a52d85SRichard Henderson tcg_gen_sssub_vec(vece, t, a, b); 1352*76f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 1353*76f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 135409a52d85SRichard Henderson } 135509a52d85SRichard Henderson 135609a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 135709a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 135809a52d85SRichard Henderson { 135909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1360*76f4a8aeSRichard Henderson INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 136109a52d85SRichard Henderson }; 136209a52d85SRichard Henderson static const GVecGen4 ops[4] = { 136309a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 136409a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_b, 136509a52d85SRichard Henderson .opt_opc = vecop_list, 136609a52d85SRichard Henderson .write_aofs = true, 136709a52d85SRichard Henderson .vece = MO_8 }, 136809a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 136909a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_h, 137009a52d85SRichard Henderson .opt_opc = vecop_list, 137109a52d85SRichard Henderson .write_aofs = true, 137209a52d85SRichard Henderson .vece = MO_16 }, 137309a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 137409a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_s, 137509a52d85SRichard Henderson .opt_opc = vecop_list, 137609a52d85SRichard Henderson .write_aofs = true, 137709a52d85SRichard Henderson .vece = MO_32 }, 137809a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 137909a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_d, 138009a52d85SRichard Henderson .opt_opc = vecop_list, 138109a52d85SRichard Henderson .write_aofs = true, 138209a52d85SRichard Henderson .vece = MO_64 }, 138309a52d85SRichard Henderson }; 138409a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 138509a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 138609a52d85SRichard Henderson } 138709a52d85SRichard Henderson 138809a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 138909a52d85SRichard Henderson { 139009a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 139109a52d85SRichard Henderson 139209a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 139309a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 139409a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 139509a52d85SRichard Henderson } 139609a52d85SRichard Henderson 139709a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 139809a52d85SRichard Henderson { 139909a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 140009a52d85SRichard Henderson 140109a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 140209a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 140309a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 140409a52d85SRichard Henderson } 140509a52d85SRichard Henderson 140609a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 140709a52d85SRichard Henderson { 140809a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 140909a52d85SRichard Henderson 141009a52d85SRichard Henderson tcg_gen_smin_vec(vece, t, a, b); 141109a52d85SRichard Henderson tcg_gen_smax_vec(vece, d, a, b); 141209a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 141309a52d85SRichard Henderson } 141409a52d85SRichard Henderson 141509a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 141609a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 141709a52d85SRichard Henderson { 141809a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 141909a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 142009a52d85SRichard Henderson }; 142109a52d85SRichard Henderson static const GVecGen3 ops[4] = { 142209a52d85SRichard Henderson { .fniv = gen_sabd_vec, 142309a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_b, 142409a52d85SRichard Henderson .opt_opc = vecop_list, 142509a52d85SRichard Henderson .vece = MO_8 }, 142609a52d85SRichard Henderson { .fniv = gen_sabd_vec, 142709a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_h, 142809a52d85SRichard Henderson .opt_opc = vecop_list, 142909a52d85SRichard Henderson .vece = MO_16 }, 143009a52d85SRichard Henderson { .fni4 = gen_sabd_i32, 143109a52d85SRichard Henderson .fniv = gen_sabd_vec, 143209a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_s, 143309a52d85SRichard Henderson .opt_opc = vecop_list, 143409a52d85SRichard Henderson .vece = MO_32 }, 143509a52d85SRichard Henderson { .fni8 = gen_sabd_i64, 143609a52d85SRichard Henderson .fniv = gen_sabd_vec, 143709a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_d, 143809a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 143909a52d85SRichard Henderson .opt_opc = vecop_list, 144009a52d85SRichard Henderson .vece = MO_64 }, 144109a52d85SRichard Henderson }; 144209a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 144309a52d85SRichard Henderson } 144409a52d85SRichard Henderson 144509a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 144609a52d85SRichard Henderson { 144709a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 144809a52d85SRichard Henderson 144909a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 145009a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 145109a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 145209a52d85SRichard Henderson } 145309a52d85SRichard Henderson 145409a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 145509a52d85SRichard Henderson { 145609a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 145709a52d85SRichard Henderson 145809a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 145909a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 146009a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 146109a52d85SRichard Henderson } 146209a52d85SRichard Henderson 146309a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 146409a52d85SRichard Henderson { 146509a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 146609a52d85SRichard Henderson 146709a52d85SRichard Henderson tcg_gen_umin_vec(vece, t, a, b); 146809a52d85SRichard Henderson tcg_gen_umax_vec(vece, d, a, b); 146909a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 147009a52d85SRichard Henderson } 147109a52d85SRichard Henderson 147209a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 147309a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 147409a52d85SRichard Henderson { 147509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 147609a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 147709a52d85SRichard Henderson }; 147809a52d85SRichard Henderson static const GVecGen3 ops[4] = { 147909a52d85SRichard Henderson { .fniv = gen_uabd_vec, 148009a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_b, 148109a52d85SRichard Henderson .opt_opc = vecop_list, 148209a52d85SRichard Henderson .vece = MO_8 }, 148309a52d85SRichard Henderson { .fniv = gen_uabd_vec, 148409a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_h, 148509a52d85SRichard Henderson .opt_opc = vecop_list, 148609a52d85SRichard Henderson .vece = MO_16 }, 148709a52d85SRichard Henderson { .fni4 = gen_uabd_i32, 148809a52d85SRichard Henderson .fniv = gen_uabd_vec, 148909a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_s, 149009a52d85SRichard Henderson .opt_opc = vecop_list, 149109a52d85SRichard Henderson .vece = MO_32 }, 149209a52d85SRichard Henderson { .fni8 = gen_uabd_i64, 149309a52d85SRichard Henderson .fniv = gen_uabd_vec, 149409a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_d, 149509a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 149609a52d85SRichard Henderson .opt_opc = vecop_list, 149709a52d85SRichard Henderson .vece = MO_64 }, 149809a52d85SRichard Henderson }; 149909a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 150009a52d85SRichard Henderson } 150109a52d85SRichard Henderson 150209a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 150309a52d85SRichard Henderson { 150409a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 150509a52d85SRichard Henderson gen_sabd_i32(t, a, b); 150609a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 150709a52d85SRichard Henderson } 150809a52d85SRichard Henderson 150909a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 151009a52d85SRichard Henderson { 151109a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 151209a52d85SRichard Henderson gen_sabd_i64(t, a, b); 151309a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 151409a52d85SRichard Henderson } 151509a52d85SRichard Henderson 151609a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 151709a52d85SRichard Henderson { 151809a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 151909a52d85SRichard Henderson gen_sabd_vec(vece, t, a, b); 152009a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 152109a52d85SRichard Henderson } 152209a52d85SRichard Henderson 152309a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 152409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 152509a52d85SRichard Henderson { 152609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 152709a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 152809a52d85SRichard Henderson INDEX_op_smin_vec, INDEX_op_smax_vec, 0 152909a52d85SRichard Henderson }; 153009a52d85SRichard Henderson static const GVecGen3 ops[4] = { 153109a52d85SRichard Henderson { .fniv = gen_saba_vec, 153209a52d85SRichard Henderson .fno = gen_helper_gvec_saba_b, 153309a52d85SRichard Henderson .opt_opc = vecop_list, 153409a52d85SRichard Henderson .load_dest = true, 153509a52d85SRichard Henderson .vece = MO_8 }, 153609a52d85SRichard Henderson { .fniv = gen_saba_vec, 153709a52d85SRichard Henderson .fno = gen_helper_gvec_saba_h, 153809a52d85SRichard Henderson .opt_opc = vecop_list, 153909a52d85SRichard Henderson .load_dest = true, 154009a52d85SRichard Henderson .vece = MO_16 }, 154109a52d85SRichard Henderson { .fni4 = gen_saba_i32, 154209a52d85SRichard Henderson .fniv = gen_saba_vec, 154309a52d85SRichard Henderson .fno = gen_helper_gvec_saba_s, 154409a52d85SRichard Henderson .opt_opc = vecop_list, 154509a52d85SRichard Henderson .load_dest = true, 154609a52d85SRichard Henderson .vece = MO_32 }, 154709a52d85SRichard Henderson { .fni8 = gen_saba_i64, 154809a52d85SRichard Henderson .fniv = gen_saba_vec, 154909a52d85SRichard Henderson .fno = gen_helper_gvec_saba_d, 155009a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 155109a52d85SRichard Henderson .opt_opc = vecop_list, 155209a52d85SRichard Henderson .load_dest = true, 155309a52d85SRichard Henderson .vece = MO_64 }, 155409a52d85SRichard Henderson }; 155509a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 155609a52d85SRichard Henderson } 155709a52d85SRichard Henderson 155809a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 155909a52d85SRichard Henderson { 156009a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 156109a52d85SRichard Henderson gen_uabd_i32(t, a, b); 156209a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 156309a52d85SRichard Henderson } 156409a52d85SRichard Henderson 156509a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 156609a52d85SRichard Henderson { 156709a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 156809a52d85SRichard Henderson gen_uabd_i64(t, a, b); 156909a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 157009a52d85SRichard Henderson } 157109a52d85SRichard Henderson 157209a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 157309a52d85SRichard Henderson { 157409a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 157509a52d85SRichard Henderson gen_uabd_vec(vece, t, a, b); 157609a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 157709a52d85SRichard Henderson } 157809a52d85SRichard Henderson 157909a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 158009a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 158109a52d85SRichard Henderson { 158209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 158309a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 158409a52d85SRichard Henderson INDEX_op_umin_vec, INDEX_op_umax_vec, 0 158509a52d85SRichard Henderson }; 158609a52d85SRichard Henderson static const GVecGen3 ops[4] = { 158709a52d85SRichard Henderson { .fniv = gen_uaba_vec, 158809a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_b, 158909a52d85SRichard Henderson .opt_opc = vecop_list, 159009a52d85SRichard Henderson .load_dest = true, 159109a52d85SRichard Henderson .vece = MO_8 }, 159209a52d85SRichard Henderson { .fniv = gen_uaba_vec, 159309a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_h, 159409a52d85SRichard Henderson .opt_opc = vecop_list, 159509a52d85SRichard Henderson .load_dest = true, 159609a52d85SRichard Henderson .vece = MO_16 }, 159709a52d85SRichard Henderson { .fni4 = gen_uaba_i32, 159809a52d85SRichard Henderson .fniv = gen_uaba_vec, 159909a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_s, 160009a52d85SRichard Henderson .opt_opc = vecop_list, 160109a52d85SRichard Henderson .load_dest = true, 160209a52d85SRichard Henderson .vece = MO_32 }, 160309a52d85SRichard Henderson { .fni8 = gen_uaba_i64, 160409a52d85SRichard Henderson .fniv = gen_uaba_vec, 160509a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_d, 160609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 160709a52d85SRichard Henderson .opt_opc = vecop_list, 160809a52d85SRichard Henderson .load_dest = true, 160909a52d85SRichard Henderson .vece = MO_64 }, 161009a52d85SRichard Henderson }; 161109a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 161209a52d85SRichard Henderson } 1613a7e4eec6SRichard Henderson 1614a7e4eec6SRichard Henderson void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1615a7e4eec6SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1616a7e4eec6SRichard Henderson { 1617a7e4eec6SRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 1618a7e4eec6SRichard Henderson gen_helper_gvec_addp_b, 1619a7e4eec6SRichard Henderson gen_helper_gvec_addp_h, 1620a7e4eec6SRichard Henderson gen_helper_gvec_addp_s, 1621a7e4eec6SRichard Henderson gen_helper_gvec_addp_d, 1622a7e4eec6SRichard Henderson }; 1623a7e4eec6SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1624a7e4eec6SRichard Henderson } 162528b5451bSRichard Henderson 162628b5451bSRichard Henderson void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 162728b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 162828b5451bSRichard Henderson { 162928b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 163028b5451bSRichard Henderson gen_helper_gvec_smaxp_b, 163128b5451bSRichard Henderson gen_helper_gvec_smaxp_h, 163228b5451bSRichard Henderson gen_helper_gvec_smaxp_s, 163328b5451bSRichard Henderson }; 163428b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 163528b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 163628b5451bSRichard Henderson } 163728b5451bSRichard Henderson 163828b5451bSRichard Henderson void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 163928b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 164028b5451bSRichard Henderson { 164128b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 164228b5451bSRichard Henderson gen_helper_gvec_sminp_b, 164328b5451bSRichard Henderson gen_helper_gvec_sminp_h, 164428b5451bSRichard Henderson gen_helper_gvec_sminp_s, 164528b5451bSRichard Henderson }; 164628b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 164728b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 164828b5451bSRichard Henderson } 164928b5451bSRichard Henderson 165028b5451bSRichard Henderson void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 165128b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 165228b5451bSRichard Henderson { 165328b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 165428b5451bSRichard Henderson gen_helper_gvec_umaxp_b, 165528b5451bSRichard Henderson gen_helper_gvec_umaxp_h, 165628b5451bSRichard Henderson gen_helper_gvec_umaxp_s, 165728b5451bSRichard Henderson }; 165828b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 165928b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 166028b5451bSRichard Henderson } 166128b5451bSRichard Henderson 166228b5451bSRichard Henderson void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 166328b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 166428b5451bSRichard Henderson { 166528b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 166628b5451bSRichard Henderson gen_helper_gvec_uminp_b, 166728b5451bSRichard Henderson gen_helper_gvec_uminp_h, 166828b5451bSRichard Henderson gen_helper_gvec_uminp_s, 166928b5451bSRichard Henderson }; 167028b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 167128b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 167228b5451bSRichard Henderson } 1673