1*09a52d85SRichard Henderson /* 2*09a52d85SRichard Henderson * ARM generic vector expansion 3*09a52d85SRichard Henderson * 4*09a52d85SRichard Henderson * Copyright (c) 2003 Fabrice Bellard 5*09a52d85SRichard Henderson * Copyright (c) 2005-2007 CodeSourcery 6*09a52d85SRichard Henderson * Copyright (c) 2007 OpenedHand, Ltd. 7*09a52d85SRichard Henderson * 8*09a52d85SRichard Henderson * This library is free software; you can redistribute it and/or 9*09a52d85SRichard Henderson * modify it under the terms of the GNU Lesser General Public 10*09a52d85SRichard Henderson * License as published by the Free Software Foundation; either 11*09a52d85SRichard Henderson * version 2.1 of the License, or (at your option) any later version. 12*09a52d85SRichard Henderson * 13*09a52d85SRichard Henderson * This library is distributed in the hope that it will be useful, 14*09a52d85SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 15*09a52d85SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16*09a52d85SRichard Henderson * Lesser General Public License for more details. 17*09a52d85SRichard Henderson * 18*09a52d85SRichard Henderson * You should have received a copy of the GNU Lesser General Public 19*09a52d85SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 20*09a52d85SRichard Henderson */ 21*09a52d85SRichard Henderson 22*09a52d85SRichard Henderson #include "qemu/osdep.h" 23*09a52d85SRichard Henderson #include "translate.h" 24*09a52d85SRichard Henderson 25*09a52d85SRichard Henderson 26*09a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 27*09a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz, 28*09a52d85SRichard Henderson gen_helper_gvec_3_ptr *fn) 29*09a52d85SRichard Henderson { 30*09a52d85SRichard Henderson TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 31*09a52d85SRichard Henderson 32*09a52d85SRichard Henderson tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 33*09a52d85SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 34*09a52d85SRichard Henderson opr_sz, max_sz, 0, fn); 35*09a52d85SRichard Henderson } 36*09a52d85SRichard Henderson 37*09a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 38*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 39*09a52d85SRichard Henderson { 40*09a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 41*09a52d85SRichard Henderson gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 42*09a52d85SRichard Henderson }; 43*09a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 44*09a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 45*09a52d85SRichard Henderson } 46*09a52d85SRichard Henderson 47*09a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 48*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 49*09a52d85SRichard Henderson { 50*09a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 51*09a52d85SRichard Henderson gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 52*09a52d85SRichard Henderson }; 53*09a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 54*09a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 55*09a52d85SRichard Henderson } 56*09a52d85SRichard Henderson 57*09a52d85SRichard Henderson #define GEN_CMP0(NAME, COND) \ 58*09a52d85SRichard Henderson void NAME(unsigned vece, uint32_t d, uint32_t m, \ 59*09a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz) \ 60*09a52d85SRichard Henderson { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 61*09a52d85SRichard Henderson 62*09a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 63*09a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 64*09a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 65*09a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 66*09a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 67*09a52d85SRichard Henderson 68*09a52d85SRichard Henderson #undef GEN_CMP0 69*09a52d85SRichard Henderson 70*09a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 71*09a52d85SRichard Henderson { 72*09a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, shift); 73*09a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 74*09a52d85SRichard Henderson } 75*09a52d85SRichard Henderson 76*09a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 77*09a52d85SRichard Henderson { 78*09a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, shift); 79*09a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 80*09a52d85SRichard Henderson } 81*09a52d85SRichard Henderson 82*09a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 83*09a52d85SRichard Henderson { 84*09a52d85SRichard Henderson tcg_gen_sari_i32(a, a, shift); 85*09a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 86*09a52d85SRichard Henderson } 87*09a52d85SRichard Henderson 88*09a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 89*09a52d85SRichard Henderson { 90*09a52d85SRichard Henderson tcg_gen_sari_i64(a, a, shift); 91*09a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 92*09a52d85SRichard Henderson } 93*09a52d85SRichard Henderson 94*09a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 95*09a52d85SRichard Henderson { 96*09a52d85SRichard Henderson tcg_gen_sari_vec(vece, a, a, sh); 97*09a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 98*09a52d85SRichard Henderson } 99*09a52d85SRichard Henderson 100*09a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 101*09a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 102*09a52d85SRichard Henderson { 103*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 104*09a52d85SRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 105*09a52d85SRichard Henderson }; 106*09a52d85SRichard Henderson static const GVecGen2i ops[4] = { 107*09a52d85SRichard Henderson { .fni8 = gen_ssra8_i64, 108*09a52d85SRichard Henderson .fniv = gen_ssra_vec, 109*09a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_b, 110*09a52d85SRichard Henderson .load_dest = true, 111*09a52d85SRichard Henderson .opt_opc = vecop_list, 112*09a52d85SRichard Henderson .vece = MO_8 }, 113*09a52d85SRichard Henderson { .fni8 = gen_ssra16_i64, 114*09a52d85SRichard Henderson .fniv = gen_ssra_vec, 115*09a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_h, 116*09a52d85SRichard Henderson .load_dest = true, 117*09a52d85SRichard Henderson .opt_opc = vecop_list, 118*09a52d85SRichard Henderson .vece = MO_16 }, 119*09a52d85SRichard Henderson { .fni4 = gen_ssra32_i32, 120*09a52d85SRichard Henderson .fniv = gen_ssra_vec, 121*09a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_s, 122*09a52d85SRichard Henderson .load_dest = true, 123*09a52d85SRichard Henderson .opt_opc = vecop_list, 124*09a52d85SRichard Henderson .vece = MO_32 }, 125*09a52d85SRichard Henderson { .fni8 = gen_ssra64_i64, 126*09a52d85SRichard Henderson .fniv = gen_ssra_vec, 127*09a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_d, 128*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 129*09a52d85SRichard Henderson .opt_opc = vecop_list, 130*09a52d85SRichard Henderson .load_dest = true, 131*09a52d85SRichard Henderson .vece = MO_64 }, 132*09a52d85SRichard Henderson }; 133*09a52d85SRichard Henderson 134*09a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 135*09a52d85SRichard Henderson tcg_debug_assert(shift > 0); 136*09a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 137*09a52d85SRichard Henderson 138*09a52d85SRichard Henderson /* 139*09a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 140*09a52d85SRichard Henderson * Signed results in all sign bits. 141*09a52d85SRichard Henderson */ 142*09a52d85SRichard Henderson shift = MIN(shift, (8 << vece) - 1); 143*09a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 144*09a52d85SRichard Henderson } 145*09a52d85SRichard Henderson 146*09a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 147*09a52d85SRichard Henderson { 148*09a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, shift); 149*09a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 150*09a52d85SRichard Henderson } 151*09a52d85SRichard Henderson 152*09a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 153*09a52d85SRichard Henderson { 154*09a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, shift); 155*09a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 156*09a52d85SRichard Henderson } 157*09a52d85SRichard Henderson 158*09a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 159*09a52d85SRichard Henderson { 160*09a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 161*09a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 162*09a52d85SRichard Henderson } 163*09a52d85SRichard Henderson 164*09a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 165*09a52d85SRichard Henderson { 166*09a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 167*09a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 168*09a52d85SRichard Henderson } 169*09a52d85SRichard Henderson 170*09a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 171*09a52d85SRichard Henderson { 172*09a52d85SRichard Henderson tcg_gen_shri_vec(vece, a, a, sh); 173*09a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 174*09a52d85SRichard Henderson } 175*09a52d85SRichard Henderson 176*09a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 177*09a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 178*09a52d85SRichard Henderson { 179*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 180*09a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 181*09a52d85SRichard Henderson }; 182*09a52d85SRichard Henderson static const GVecGen2i ops[4] = { 183*09a52d85SRichard Henderson { .fni8 = gen_usra8_i64, 184*09a52d85SRichard Henderson .fniv = gen_usra_vec, 185*09a52d85SRichard Henderson .fno = gen_helper_gvec_usra_b, 186*09a52d85SRichard Henderson .load_dest = true, 187*09a52d85SRichard Henderson .opt_opc = vecop_list, 188*09a52d85SRichard Henderson .vece = MO_8, }, 189*09a52d85SRichard Henderson { .fni8 = gen_usra16_i64, 190*09a52d85SRichard Henderson .fniv = gen_usra_vec, 191*09a52d85SRichard Henderson .fno = gen_helper_gvec_usra_h, 192*09a52d85SRichard Henderson .load_dest = true, 193*09a52d85SRichard Henderson .opt_opc = vecop_list, 194*09a52d85SRichard Henderson .vece = MO_16, }, 195*09a52d85SRichard Henderson { .fni4 = gen_usra32_i32, 196*09a52d85SRichard Henderson .fniv = gen_usra_vec, 197*09a52d85SRichard Henderson .fno = gen_helper_gvec_usra_s, 198*09a52d85SRichard Henderson .load_dest = true, 199*09a52d85SRichard Henderson .opt_opc = vecop_list, 200*09a52d85SRichard Henderson .vece = MO_32, }, 201*09a52d85SRichard Henderson { .fni8 = gen_usra64_i64, 202*09a52d85SRichard Henderson .fniv = gen_usra_vec, 203*09a52d85SRichard Henderson .fno = gen_helper_gvec_usra_d, 204*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 205*09a52d85SRichard Henderson .load_dest = true, 206*09a52d85SRichard Henderson .opt_opc = vecop_list, 207*09a52d85SRichard Henderson .vece = MO_64, }, 208*09a52d85SRichard Henderson }; 209*09a52d85SRichard Henderson 210*09a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 211*09a52d85SRichard Henderson tcg_debug_assert(shift > 0); 212*09a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 213*09a52d85SRichard Henderson 214*09a52d85SRichard Henderson /* 215*09a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 216*09a52d85SRichard Henderson * Unsigned results in all zeros as input to accumulate: nop. 217*09a52d85SRichard Henderson */ 218*09a52d85SRichard Henderson if (shift < (8 << vece)) { 219*09a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 220*09a52d85SRichard Henderson } else { 221*09a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 222*09a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 223*09a52d85SRichard Henderson } 224*09a52d85SRichard Henderson } 225*09a52d85SRichard Henderson 226*09a52d85SRichard Henderson /* 227*09a52d85SRichard Henderson * Shift one less than the requested amount, and the low bit is 228*09a52d85SRichard Henderson * the rounding bit. For the 8 and 16-bit operations, because we 229*09a52d85SRichard Henderson * mask the low bit, we can perform a normal integer shift instead 230*09a52d85SRichard Henderson * of a vector shift. 231*09a52d85SRichard Henderson */ 232*09a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 233*09a52d85SRichard Henderson { 234*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 235*09a52d85SRichard Henderson 236*09a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 237*09a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 238*09a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(d, a, sh); 239*09a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 240*09a52d85SRichard Henderson } 241*09a52d85SRichard Henderson 242*09a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 243*09a52d85SRichard Henderson { 244*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 245*09a52d85SRichard Henderson 246*09a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 247*09a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 248*09a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(d, a, sh); 249*09a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 250*09a52d85SRichard Henderson } 251*09a52d85SRichard Henderson 252*09a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 253*09a52d85SRichard Henderson { 254*09a52d85SRichard Henderson TCGv_i32 t; 255*09a52d85SRichard Henderson 256*09a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 257*09a52d85SRichard Henderson if (sh == 32) { 258*09a52d85SRichard Henderson tcg_gen_movi_i32(d, 0); 259*09a52d85SRichard Henderson return; 260*09a52d85SRichard Henderson } 261*09a52d85SRichard Henderson t = tcg_temp_new_i32(); 262*09a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 263*09a52d85SRichard Henderson tcg_gen_sari_i32(d, a, sh); 264*09a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 265*09a52d85SRichard Henderson } 266*09a52d85SRichard Henderson 267*09a52d85SRichard Henderson void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 268*09a52d85SRichard Henderson { 269*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 270*09a52d85SRichard Henderson 271*09a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 272*09a52d85SRichard Henderson tcg_gen_sari_i64(d, a, sh); 273*09a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 274*09a52d85SRichard Henderson } 275*09a52d85SRichard Henderson 276*09a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 277*09a52d85SRichard Henderson { 278*09a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 279*09a52d85SRichard Henderson TCGv_vec ones = tcg_temp_new_vec_matching(d); 280*09a52d85SRichard Henderson 281*09a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 282*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, ones, 1); 283*09a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 284*09a52d85SRichard Henderson tcg_gen_sari_vec(vece, d, a, sh); 285*09a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 286*09a52d85SRichard Henderson } 287*09a52d85SRichard Henderson 288*09a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 289*09a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 290*09a52d85SRichard Henderson { 291*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 292*09a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 293*09a52d85SRichard Henderson }; 294*09a52d85SRichard Henderson static const GVecGen2i ops[4] = { 295*09a52d85SRichard Henderson { .fni8 = gen_srshr8_i64, 296*09a52d85SRichard Henderson .fniv = gen_srshr_vec, 297*09a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_b, 298*09a52d85SRichard Henderson .opt_opc = vecop_list, 299*09a52d85SRichard Henderson .vece = MO_8 }, 300*09a52d85SRichard Henderson { .fni8 = gen_srshr16_i64, 301*09a52d85SRichard Henderson .fniv = gen_srshr_vec, 302*09a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_h, 303*09a52d85SRichard Henderson .opt_opc = vecop_list, 304*09a52d85SRichard Henderson .vece = MO_16 }, 305*09a52d85SRichard Henderson { .fni4 = gen_srshr32_i32, 306*09a52d85SRichard Henderson .fniv = gen_srshr_vec, 307*09a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_s, 308*09a52d85SRichard Henderson .opt_opc = vecop_list, 309*09a52d85SRichard Henderson .vece = MO_32 }, 310*09a52d85SRichard Henderson { .fni8 = gen_srshr64_i64, 311*09a52d85SRichard Henderson .fniv = gen_srshr_vec, 312*09a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_d, 313*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 314*09a52d85SRichard Henderson .opt_opc = vecop_list, 315*09a52d85SRichard Henderson .vece = MO_64 }, 316*09a52d85SRichard Henderson }; 317*09a52d85SRichard Henderson 318*09a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 319*09a52d85SRichard Henderson tcg_debug_assert(shift > 0); 320*09a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 321*09a52d85SRichard Henderson 322*09a52d85SRichard Henderson if (shift == (8 << vece)) { 323*09a52d85SRichard Henderson /* 324*09a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 325*09a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 326*09a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 327*09a52d85SRichard Henderson * I.e. always zero. 328*09a52d85SRichard Henderson */ 329*09a52d85SRichard Henderson tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 330*09a52d85SRichard Henderson } else { 331*09a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 332*09a52d85SRichard Henderson } 333*09a52d85SRichard Henderson } 334*09a52d85SRichard Henderson 335*09a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 336*09a52d85SRichard Henderson { 337*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 338*09a52d85SRichard Henderson 339*09a52d85SRichard Henderson gen_srshr8_i64(t, a, sh); 340*09a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 341*09a52d85SRichard Henderson } 342*09a52d85SRichard Henderson 343*09a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 344*09a52d85SRichard Henderson { 345*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 346*09a52d85SRichard Henderson 347*09a52d85SRichard Henderson gen_srshr16_i64(t, a, sh); 348*09a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 349*09a52d85SRichard Henderson } 350*09a52d85SRichard Henderson 351*09a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 352*09a52d85SRichard Henderson { 353*09a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 354*09a52d85SRichard Henderson 355*09a52d85SRichard Henderson gen_srshr32_i32(t, a, sh); 356*09a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 357*09a52d85SRichard Henderson } 358*09a52d85SRichard Henderson 359*09a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 360*09a52d85SRichard Henderson { 361*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 362*09a52d85SRichard Henderson 363*09a52d85SRichard Henderson gen_srshr64_i64(t, a, sh); 364*09a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 365*09a52d85SRichard Henderson } 366*09a52d85SRichard Henderson 367*09a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 368*09a52d85SRichard Henderson { 369*09a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 370*09a52d85SRichard Henderson 371*09a52d85SRichard Henderson gen_srshr_vec(vece, t, a, sh); 372*09a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 373*09a52d85SRichard Henderson } 374*09a52d85SRichard Henderson 375*09a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 376*09a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 377*09a52d85SRichard Henderson { 378*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 379*09a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 380*09a52d85SRichard Henderson }; 381*09a52d85SRichard Henderson static const GVecGen2i ops[4] = { 382*09a52d85SRichard Henderson { .fni8 = gen_srsra8_i64, 383*09a52d85SRichard Henderson .fniv = gen_srsra_vec, 384*09a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_b, 385*09a52d85SRichard Henderson .opt_opc = vecop_list, 386*09a52d85SRichard Henderson .load_dest = true, 387*09a52d85SRichard Henderson .vece = MO_8 }, 388*09a52d85SRichard Henderson { .fni8 = gen_srsra16_i64, 389*09a52d85SRichard Henderson .fniv = gen_srsra_vec, 390*09a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_h, 391*09a52d85SRichard Henderson .opt_opc = vecop_list, 392*09a52d85SRichard Henderson .load_dest = true, 393*09a52d85SRichard Henderson .vece = MO_16 }, 394*09a52d85SRichard Henderson { .fni4 = gen_srsra32_i32, 395*09a52d85SRichard Henderson .fniv = gen_srsra_vec, 396*09a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_s, 397*09a52d85SRichard Henderson .opt_opc = vecop_list, 398*09a52d85SRichard Henderson .load_dest = true, 399*09a52d85SRichard Henderson .vece = MO_32 }, 400*09a52d85SRichard Henderson { .fni8 = gen_srsra64_i64, 401*09a52d85SRichard Henderson .fniv = gen_srsra_vec, 402*09a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_d, 403*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 404*09a52d85SRichard Henderson .opt_opc = vecop_list, 405*09a52d85SRichard Henderson .load_dest = true, 406*09a52d85SRichard Henderson .vece = MO_64 }, 407*09a52d85SRichard Henderson }; 408*09a52d85SRichard Henderson 409*09a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 410*09a52d85SRichard Henderson tcg_debug_assert(shift > 0); 411*09a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 412*09a52d85SRichard Henderson 413*09a52d85SRichard Henderson /* 414*09a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 415*09a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 416*09a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 417*09a52d85SRichard Henderson * I.e. always zero. With accumulation, this leaves D unchanged. 418*09a52d85SRichard Henderson */ 419*09a52d85SRichard Henderson if (shift == (8 << vece)) { 420*09a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 421*09a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 422*09a52d85SRichard Henderson } else { 423*09a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 424*09a52d85SRichard Henderson } 425*09a52d85SRichard Henderson } 426*09a52d85SRichard Henderson 427*09a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 428*09a52d85SRichard Henderson { 429*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 430*09a52d85SRichard Henderson 431*09a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 432*09a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 433*09a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(d, a, sh); 434*09a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 435*09a52d85SRichard Henderson } 436*09a52d85SRichard Henderson 437*09a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 438*09a52d85SRichard Henderson { 439*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 440*09a52d85SRichard Henderson 441*09a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 442*09a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 443*09a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(d, a, sh); 444*09a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 445*09a52d85SRichard Henderson } 446*09a52d85SRichard Henderson 447*09a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 448*09a52d85SRichard Henderson { 449*09a52d85SRichard Henderson TCGv_i32 t; 450*09a52d85SRichard Henderson 451*09a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 452*09a52d85SRichard Henderson if (sh == 32) { 453*09a52d85SRichard Henderson tcg_gen_extract_i32(d, a, sh - 1, 1); 454*09a52d85SRichard Henderson return; 455*09a52d85SRichard Henderson } 456*09a52d85SRichard Henderson t = tcg_temp_new_i32(); 457*09a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 458*09a52d85SRichard Henderson tcg_gen_shri_i32(d, a, sh); 459*09a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 460*09a52d85SRichard Henderson } 461*09a52d85SRichard Henderson 462*09a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 463*09a52d85SRichard Henderson { 464*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 465*09a52d85SRichard Henderson 466*09a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 467*09a52d85SRichard Henderson tcg_gen_shri_i64(d, a, sh); 468*09a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 469*09a52d85SRichard Henderson } 470*09a52d85SRichard Henderson 471*09a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 472*09a52d85SRichard Henderson { 473*09a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 474*09a52d85SRichard Henderson TCGv_vec ones = tcg_temp_new_vec_matching(d); 475*09a52d85SRichard Henderson 476*09a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, shift - 1); 477*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, ones, 1); 478*09a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 479*09a52d85SRichard Henderson tcg_gen_shri_vec(vece, d, a, shift); 480*09a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 481*09a52d85SRichard Henderson } 482*09a52d85SRichard Henderson 483*09a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 484*09a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 485*09a52d85SRichard Henderson { 486*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 487*09a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 488*09a52d85SRichard Henderson }; 489*09a52d85SRichard Henderson static const GVecGen2i ops[4] = { 490*09a52d85SRichard Henderson { .fni8 = gen_urshr8_i64, 491*09a52d85SRichard Henderson .fniv = gen_urshr_vec, 492*09a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_b, 493*09a52d85SRichard Henderson .opt_opc = vecop_list, 494*09a52d85SRichard Henderson .vece = MO_8 }, 495*09a52d85SRichard Henderson { .fni8 = gen_urshr16_i64, 496*09a52d85SRichard Henderson .fniv = gen_urshr_vec, 497*09a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_h, 498*09a52d85SRichard Henderson .opt_opc = vecop_list, 499*09a52d85SRichard Henderson .vece = MO_16 }, 500*09a52d85SRichard Henderson { .fni4 = gen_urshr32_i32, 501*09a52d85SRichard Henderson .fniv = gen_urshr_vec, 502*09a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_s, 503*09a52d85SRichard Henderson .opt_opc = vecop_list, 504*09a52d85SRichard Henderson .vece = MO_32 }, 505*09a52d85SRichard Henderson { .fni8 = gen_urshr64_i64, 506*09a52d85SRichard Henderson .fniv = gen_urshr_vec, 507*09a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_d, 508*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 509*09a52d85SRichard Henderson .opt_opc = vecop_list, 510*09a52d85SRichard Henderson .vece = MO_64 }, 511*09a52d85SRichard Henderson }; 512*09a52d85SRichard Henderson 513*09a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 514*09a52d85SRichard Henderson tcg_debug_assert(shift > 0); 515*09a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 516*09a52d85SRichard Henderson 517*09a52d85SRichard Henderson if (shift == (8 << vece)) { 518*09a52d85SRichard Henderson /* 519*09a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 520*09a52d85SRichard Henderson * Unsigned results in zero. With rounding, this produces a 521*09a52d85SRichard Henderson * copy of the most significant bit. 522*09a52d85SRichard Henderson */ 523*09a52d85SRichard Henderson tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 524*09a52d85SRichard Henderson } else { 525*09a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 526*09a52d85SRichard Henderson } 527*09a52d85SRichard Henderson } 528*09a52d85SRichard Henderson 529*09a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 530*09a52d85SRichard Henderson { 531*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 532*09a52d85SRichard Henderson 533*09a52d85SRichard Henderson if (sh == 8) { 534*09a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(t, a, 7); 535*09a52d85SRichard Henderson } else { 536*09a52d85SRichard Henderson gen_urshr8_i64(t, a, sh); 537*09a52d85SRichard Henderson } 538*09a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 539*09a52d85SRichard Henderson } 540*09a52d85SRichard Henderson 541*09a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 542*09a52d85SRichard Henderson { 543*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 544*09a52d85SRichard Henderson 545*09a52d85SRichard Henderson if (sh == 16) { 546*09a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(t, a, 15); 547*09a52d85SRichard Henderson } else { 548*09a52d85SRichard Henderson gen_urshr16_i64(t, a, sh); 549*09a52d85SRichard Henderson } 550*09a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 551*09a52d85SRichard Henderson } 552*09a52d85SRichard Henderson 553*09a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 554*09a52d85SRichard Henderson { 555*09a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 556*09a52d85SRichard Henderson 557*09a52d85SRichard Henderson if (sh == 32) { 558*09a52d85SRichard Henderson tcg_gen_shri_i32(t, a, 31); 559*09a52d85SRichard Henderson } else { 560*09a52d85SRichard Henderson gen_urshr32_i32(t, a, sh); 561*09a52d85SRichard Henderson } 562*09a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 563*09a52d85SRichard Henderson } 564*09a52d85SRichard Henderson 565*09a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 566*09a52d85SRichard Henderson { 567*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 568*09a52d85SRichard Henderson 569*09a52d85SRichard Henderson if (sh == 64) { 570*09a52d85SRichard Henderson tcg_gen_shri_i64(t, a, 63); 571*09a52d85SRichard Henderson } else { 572*09a52d85SRichard Henderson gen_urshr64_i64(t, a, sh); 573*09a52d85SRichard Henderson } 574*09a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 575*09a52d85SRichard Henderson } 576*09a52d85SRichard Henderson 577*09a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 578*09a52d85SRichard Henderson { 579*09a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 580*09a52d85SRichard Henderson 581*09a52d85SRichard Henderson if (sh == (8 << vece)) { 582*09a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 583*09a52d85SRichard Henderson } else { 584*09a52d85SRichard Henderson gen_urshr_vec(vece, t, a, sh); 585*09a52d85SRichard Henderson } 586*09a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 587*09a52d85SRichard Henderson } 588*09a52d85SRichard Henderson 589*09a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 590*09a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 591*09a52d85SRichard Henderson { 592*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 593*09a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 594*09a52d85SRichard Henderson }; 595*09a52d85SRichard Henderson static const GVecGen2i ops[4] = { 596*09a52d85SRichard Henderson { .fni8 = gen_ursra8_i64, 597*09a52d85SRichard Henderson .fniv = gen_ursra_vec, 598*09a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_b, 599*09a52d85SRichard Henderson .opt_opc = vecop_list, 600*09a52d85SRichard Henderson .load_dest = true, 601*09a52d85SRichard Henderson .vece = MO_8 }, 602*09a52d85SRichard Henderson { .fni8 = gen_ursra16_i64, 603*09a52d85SRichard Henderson .fniv = gen_ursra_vec, 604*09a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_h, 605*09a52d85SRichard Henderson .opt_opc = vecop_list, 606*09a52d85SRichard Henderson .load_dest = true, 607*09a52d85SRichard Henderson .vece = MO_16 }, 608*09a52d85SRichard Henderson { .fni4 = gen_ursra32_i32, 609*09a52d85SRichard Henderson .fniv = gen_ursra_vec, 610*09a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_s, 611*09a52d85SRichard Henderson .opt_opc = vecop_list, 612*09a52d85SRichard Henderson .load_dest = true, 613*09a52d85SRichard Henderson .vece = MO_32 }, 614*09a52d85SRichard Henderson { .fni8 = gen_ursra64_i64, 615*09a52d85SRichard Henderson .fniv = gen_ursra_vec, 616*09a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_d, 617*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 618*09a52d85SRichard Henderson .opt_opc = vecop_list, 619*09a52d85SRichard Henderson .load_dest = true, 620*09a52d85SRichard Henderson .vece = MO_64 }, 621*09a52d85SRichard Henderson }; 622*09a52d85SRichard Henderson 623*09a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 624*09a52d85SRichard Henderson tcg_debug_assert(shift > 0); 625*09a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 626*09a52d85SRichard Henderson 627*09a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 628*09a52d85SRichard Henderson } 629*09a52d85SRichard Henderson 630*09a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 631*09a52d85SRichard Henderson { 632*09a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff >> shift); 633*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 634*09a52d85SRichard Henderson 635*09a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 636*09a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 637*09a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 638*09a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 639*09a52d85SRichard Henderson } 640*09a52d85SRichard Henderson 641*09a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 642*09a52d85SRichard Henderson { 643*09a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff >> shift); 644*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 645*09a52d85SRichard Henderson 646*09a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 647*09a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 648*09a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 649*09a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 650*09a52d85SRichard Henderson } 651*09a52d85SRichard Henderson 652*09a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 653*09a52d85SRichard Henderson { 654*09a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 655*09a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 656*09a52d85SRichard Henderson } 657*09a52d85SRichard Henderson 658*09a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 659*09a52d85SRichard Henderson { 660*09a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 661*09a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 662*09a52d85SRichard Henderson } 663*09a52d85SRichard Henderson 664*09a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 665*09a52d85SRichard Henderson { 666*09a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 667*09a52d85SRichard Henderson TCGv_vec m = tcg_temp_new_vec_matching(d); 668*09a52d85SRichard Henderson 669*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 670*09a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh); 671*09a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 672*09a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 673*09a52d85SRichard Henderson } 674*09a52d85SRichard Henderson 675*09a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 676*09a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 677*09a52d85SRichard Henderson { 678*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 679*09a52d85SRichard Henderson const GVecGen2i ops[4] = { 680*09a52d85SRichard Henderson { .fni8 = gen_shr8_ins_i64, 681*09a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 682*09a52d85SRichard Henderson .fno = gen_helper_gvec_sri_b, 683*09a52d85SRichard Henderson .load_dest = true, 684*09a52d85SRichard Henderson .opt_opc = vecop_list, 685*09a52d85SRichard Henderson .vece = MO_8 }, 686*09a52d85SRichard Henderson { .fni8 = gen_shr16_ins_i64, 687*09a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 688*09a52d85SRichard Henderson .fno = gen_helper_gvec_sri_h, 689*09a52d85SRichard Henderson .load_dest = true, 690*09a52d85SRichard Henderson .opt_opc = vecop_list, 691*09a52d85SRichard Henderson .vece = MO_16 }, 692*09a52d85SRichard Henderson { .fni4 = gen_shr32_ins_i32, 693*09a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 694*09a52d85SRichard Henderson .fno = gen_helper_gvec_sri_s, 695*09a52d85SRichard Henderson .load_dest = true, 696*09a52d85SRichard Henderson .opt_opc = vecop_list, 697*09a52d85SRichard Henderson .vece = MO_32 }, 698*09a52d85SRichard Henderson { .fni8 = gen_shr64_ins_i64, 699*09a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 700*09a52d85SRichard Henderson .fno = gen_helper_gvec_sri_d, 701*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 702*09a52d85SRichard Henderson .load_dest = true, 703*09a52d85SRichard Henderson .opt_opc = vecop_list, 704*09a52d85SRichard Henderson .vece = MO_64 }, 705*09a52d85SRichard Henderson }; 706*09a52d85SRichard Henderson 707*09a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 708*09a52d85SRichard Henderson tcg_debug_assert(shift > 0); 709*09a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 710*09a52d85SRichard Henderson 711*09a52d85SRichard Henderson /* Shift of esize leaves destination unchanged. */ 712*09a52d85SRichard Henderson if (shift < (8 << vece)) { 713*09a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 714*09a52d85SRichard Henderson } else { 715*09a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 716*09a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 717*09a52d85SRichard Henderson } 718*09a52d85SRichard Henderson } 719*09a52d85SRichard Henderson 720*09a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 721*09a52d85SRichard Henderson { 722*09a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff << shift); 723*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 724*09a52d85SRichard Henderson 725*09a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 726*09a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 727*09a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 728*09a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 729*09a52d85SRichard Henderson } 730*09a52d85SRichard Henderson 731*09a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 732*09a52d85SRichard Henderson { 733*09a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff << shift); 734*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 735*09a52d85SRichard Henderson 736*09a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 737*09a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 738*09a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 739*09a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 740*09a52d85SRichard Henderson } 741*09a52d85SRichard Henderson 742*09a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 743*09a52d85SRichard Henderson { 744*09a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 745*09a52d85SRichard Henderson } 746*09a52d85SRichard Henderson 747*09a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 748*09a52d85SRichard Henderson { 749*09a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 750*09a52d85SRichard Henderson } 751*09a52d85SRichard Henderson 752*09a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 753*09a52d85SRichard Henderson { 754*09a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 755*09a52d85SRichard Henderson TCGv_vec m = tcg_temp_new_vec_matching(d); 756*09a52d85SRichard Henderson 757*09a52d85SRichard Henderson tcg_gen_shli_vec(vece, t, a, sh); 758*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 759*09a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 760*09a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 761*09a52d85SRichard Henderson } 762*09a52d85SRichard Henderson 763*09a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 764*09a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 765*09a52d85SRichard Henderson { 766*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 767*09a52d85SRichard Henderson const GVecGen2i ops[4] = { 768*09a52d85SRichard Henderson { .fni8 = gen_shl8_ins_i64, 769*09a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 770*09a52d85SRichard Henderson .fno = gen_helper_gvec_sli_b, 771*09a52d85SRichard Henderson .load_dest = true, 772*09a52d85SRichard Henderson .opt_opc = vecop_list, 773*09a52d85SRichard Henderson .vece = MO_8 }, 774*09a52d85SRichard Henderson { .fni8 = gen_shl16_ins_i64, 775*09a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 776*09a52d85SRichard Henderson .fno = gen_helper_gvec_sli_h, 777*09a52d85SRichard Henderson .load_dest = true, 778*09a52d85SRichard Henderson .opt_opc = vecop_list, 779*09a52d85SRichard Henderson .vece = MO_16 }, 780*09a52d85SRichard Henderson { .fni4 = gen_shl32_ins_i32, 781*09a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 782*09a52d85SRichard Henderson .fno = gen_helper_gvec_sli_s, 783*09a52d85SRichard Henderson .load_dest = true, 784*09a52d85SRichard Henderson .opt_opc = vecop_list, 785*09a52d85SRichard Henderson .vece = MO_32 }, 786*09a52d85SRichard Henderson { .fni8 = gen_shl64_ins_i64, 787*09a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 788*09a52d85SRichard Henderson .fno = gen_helper_gvec_sli_d, 789*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 790*09a52d85SRichard Henderson .load_dest = true, 791*09a52d85SRichard Henderson .opt_opc = vecop_list, 792*09a52d85SRichard Henderson .vece = MO_64 }, 793*09a52d85SRichard Henderson }; 794*09a52d85SRichard Henderson 795*09a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [0..esize-1]. */ 796*09a52d85SRichard Henderson tcg_debug_assert(shift >= 0); 797*09a52d85SRichard Henderson tcg_debug_assert(shift < (8 << vece)); 798*09a52d85SRichard Henderson 799*09a52d85SRichard Henderson if (shift == 0) { 800*09a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 801*09a52d85SRichard Henderson } else { 802*09a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 803*09a52d85SRichard Henderson } 804*09a52d85SRichard Henderson } 805*09a52d85SRichard Henderson 806*09a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 807*09a52d85SRichard Henderson { 808*09a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 809*09a52d85SRichard Henderson gen_helper_neon_add_u8(d, d, a); 810*09a52d85SRichard Henderson } 811*09a52d85SRichard Henderson 812*09a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 813*09a52d85SRichard Henderson { 814*09a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 815*09a52d85SRichard Henderson gen_helper_neon_sub_u8(d, d, a); 816*09a52d85SRichard Henderson } 817*09a52d85SRichard Henderson 818*09a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 819*09a52d85SRichard Henderson { 820*09a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 821*09a52d85SRichard Henderson gen_helper_neon_add_u16(d, d, a); 822*09a52d85SRichard Henderson } 823*09a52d85SRichard Henderson 824*09a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 825*09a52d85SRichard Henderson { 826*09a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 827*09a52d85SRichard Henderson gen_helper_neon_sub_u16(d, d, a); 828*09a52d85SRichard Henderson } 829*09a52d85SRichard Henderson 830*09a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 831*09a52d85SRichard Henderson { 832*09a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 833*09a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 834*09a52d85SRichard Henderson } 835*09a52d85SRichard Henderson 836*09a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 837*09a52d85SRichard Henderson { 838*09a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 839*09a52d85SRichard Henderson tcg_gen_sub_i32(d, d, a); 840*09a52d85SRichard Henderson } 841*09a52d85SRichard Henderson 842*09a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 843*09a52d85SRichard Henderson { 844*09a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 845*09a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 846*09a52d85SRichard Henderson } 847*09a52d85SRichard Henderson 848*09a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 849*09a52d85SRichard Henderson { 850*09a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 851*09a52d85SRichard Henderson tcg_gen_sub_i64(d, d, a); 852*09a52d85SRichard Henderson } 853*09a52d85SRichard Henderson 854*09a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 855*09a52d85SRichard Henderson { 856*09a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 857*09a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 858*09a52d85SRichard Henderson } 859*09a52d85SRichard Henderson 860*09a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 861*09a52d85SRichard Henderson { 862*09a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 863*09a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, a); 864*09a52d85SRichard Henderson } 865*09a52d85SRichard Henderson 866*09a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 867*09a52d85SRichard Henderson * these tables are shared with AArch64 which does support them. 868*09a52d85SRichard Henderson */ 869*09a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 870*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 871*09a52d85SRichard Henderson { 872*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 873*09a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_add_vec, 0 874*09a52d85SRichard Henderson }; 875*09a52d85SRichard Henderson static const GVecGen3 ops[4] = { 876*09a52d85SRichard Henderson { .fni4 = gen_mla8_i32, 877*09a52d85SRichard Henderson .fniv = gen_mla_vec, 878*09a52d85SRichard Henderson .load_dest = true, 879*09a52d85SRichard Henderson .opt_opc = vecop_list, 880*09a52d85SRichard Henderson .vece = MO_8 }, 881*09a52d85SRichard Henderson { .fni4 = gen_mla16_i32, 882*09a52d85SRichard Henderson .fniv = gen_mla_vec, 883*09a52d85SRichard Henderson .load_dest = true, 884*09a52d85SRichard Henderson .opt_opc = vecop_list, 885*09a52d85SRichard Henderson .vece = MO_16 }, 886*09a52d85SRichard Henderson { .fni4 = gen_mla32_i32, 887*09a52d85SRichard Henderson .fniv = gen_mla_vec, 888*09a52d85SRichard Henderson .load_dest = true, 889*09a52d85SRichard Henderson .opt_opc = vecop_list, 890*09a52d85SRichard Henderson .vece = MO_32 }, 891*09a52d85SRichard Henderson { .fni8 = gen_mla64_i64, 892*09a52d85SRichard Henderson .fniv = gen_mla_vec, 893*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 894*09a52d85SRichard Henderson .load_dest = true, 895*09a52d85SRichard Henderson .opt_opc = vecop_list, 896*09a52d85SRichard Henderson .vece = MO_64 }, 897*09a52d85SRichard Henderson }; 898*09a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 899*09a52d85SRichard Henderson } 900*09a52d85SRichard Henderson 901*09a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 902*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 903*09a52d85SRichard Henderson { 904*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 905*09a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_sub_vec, 0 906*09a52d85SRichard Henderson }; 907*09a52d85SRichard Henderson static const GVecGen3 ops[4] = { 908*09a52d85SRichard Henderson { .fni4 = gen_mls8_i32, 909*09a52d85SRichard Henderson .fniv = gen_mls_vec, 910*09a52d85SRichard Henderson .load_dest = true, 911*09a52d85SRichard Henderson .opt_opc = vecop_list, 912*09a52d85SRichard Henderson .vece = MO_8 }, 913*09a52d85SRichard Henderson { .fni4 = gen_mls16_i32, 914*09a52d85SRichard Henderson .fniv = gen_mls_vec, 915*09a52d85SRichard Henderson .load_dest = true, 916*09a52d85SRichard Henderson .opt_opc = vecop_list, 917*09a52d85SRichard Henderson .vece = MO_16 }, 918*09a52d85SRichard Henderson { .fni4 = gen_mls32_i32, 919*09a52d85SRichard Henderson .fniv = gen_mls_vec, 920*09a52d85SRichard Henderson .load_dest = true, 921*09a52d85SRichard Henderson .opt_opc = vecop_list, 922*09a52d85SRichard Henderson .vece = MO_32 }, 923*09a52d85SRichard Henderson { .fni8 = gen_mls64_i64, 924*09a52d85SRichard Henderson .fniv = gen_mls_vec, 925*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 926*09a52d85SRichard Henderson .load_dest = true, 927*09a52d85SRichard Henderson .opt_opc = vecop_list, 928*09a52d85SRichard Henderson .vece = MO_64 }, 929*09a52d85SRichard Henderson }; 930*09a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 931*09a52d85SRichard Henderson } 932*09a52d85SRichard Henderson 933*09a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */ 934*09a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 935*09a52d85SRichard Henderson { 936*09a52d85SRichard Henderson tcg_gen_and_i32(d, a, b); 937*09a52d85SRichard Henderson tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); 938*09a52d85SRichard Henderson } 939*09a52d85SRichard Henderson 940*09a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 941*09a52d85SRichard Henderson { 942*09a52d85SRichard Henderson tcg_gen_and_i64(d, a, b); 943*09a52d85SRichard Henderson tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); 944*09a52d85SRichard Henderson } 945*09a52d85SRichard Henderson 946*09a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 947*09a52d85SRichard Henderson { 948*09a52d85SRichard Henderson tcg_gen_and_vec(vece, d, a, b); 949*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, a, 0); 950*09a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); 951*09a52d85SRichard Henderson } 952*09a52d85SRichard Henderson 953*09a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 954*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 955*09a52d85SRichard Henderson { 956*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 957*09a52d85SRichard Henderson static const GVecGen3 ops[4] = { 958*09a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u8, 959*09a52d85SRichard Henderson .fniv = gen_cmtst_vec, 960*09a52d85SRichard Henderson .opt_opc = vecop_list, 961*09a52d85SRichard Henderson .vece = MO_8 }, 962*09a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u16, 963*09a52d85SRichard Henderson .fniv = gen_cmtst_vec, 964*09a52d85SRichard Henderson .opt_opc = vecop_list, 965*09a52d85SRichard Henderson .vece = MO_16 }, 966*09a52d85SRichard Henderson { .fni4 = gen_cmtst_i32, 967*09a52d85SRichard Henderson .fniv = gen_cmtst_vec, 968*09a52d85SRichard Henderson .opt_opc = vecop_list, 969*09a52d85SRichard Henderson .vece = MO_32 }, 970*09a52d85SRichard Henderson { .fni8 = gen_cmtst_i64, 971*09a52d85SRichard Henderson .fniv = gen_cmtst_vec, 972*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 973*09a52d85SRichard Henderson .opt_opc = vecop_list, 974*09a52d85SRichard Henderson .vece = MO_64 }, 975*09a52d85SRichard Henderson }; 976*09a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 977*09a52d85SRichard Henderson } 978*09a52d85SRichard Henderson 979*09a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 980*09a52d85SRichard Henderson { 981*09a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 982*09a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 983*09a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 984*09a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 985*09a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 986*09a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(32); 987*09a52d85SRichard Henderson 988*09a52d85SRichard Henderson /* 989*09a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 990*09a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 991*09a52d85SRichard Henderson * Discard out-of-range results after the fact. 992*09a52d85SRichard Henderson */ 993*09a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 994*09a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 995*09a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 996*09a52d85SRichard Henderson tcg_gen_shr_i32(rval, src, rsh); 997*09a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 998*09a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 999*09a52d85SRichard Henderson } 1000*09a52d85SRichard Henderson 1001*09a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1002*09a52d85SRichard Henderson { 1003*09a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 1004*09a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 1005*09a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 1006*09a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 1007*09a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 1008*09a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(64); 1009*09a52d85SRichard Henderson 1010*09a52d85SRichard Henderson /* 1011*09a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 1012*09a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 1013*09a52d85SRichard Henderson * Discard out-of-range results after the fact. 1014*09a52d85SRichard Henderson */ 1015*09a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 1016*09a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 1017*09a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 1018*09a52d85SRichard Henderson tcg_gen_shr_i64(rval, src, rsh); 1019*09a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 1020*09a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 1021*09a52d85SRichard Henderson } 1022*09a52d85SRichard Henderson 1023*09a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 1024*09a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 1025*09a52d85SRichard Henderson { 1026*09a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1027*09a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1028*09a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1029*09a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1030*09a52d85SRichard Henderson TCGv_vec msk, max; 1031*09a52d85SRichard Henderson 1032*09a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 1033*09a52d85SRichard Henderson if (vece == MO_8) { 1034*09a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 1035*09a52d85SRichard Henderson } else { 1036*09a52d85SRichard Henderson msk = tcg_temp_new_vec_matching(dst); 1037*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, msk, 0xff); 1038*09a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, msk); 1039*09a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, msk); 1040*09a52d85SRichard Henderson } 1041*09a52d85SRichard Henderson 1042*09a52d85SRichard Henderson /* 1043*09a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 1044*09a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 1045*09a52d85SRichard Henderson * Discard out-of-range results after the fact. 1046*09a52d85SRichard Henderson */ 1047*09a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 1048*09a52d85SRichard Henderson tcg_gen_shrv_vec(vece, rval, src, rsh); 1049*09a52d85SRichard Henderson 1050*09a52d85SRichard Henderson max = tcg_temp_new_vec_matching(dst); 1051*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, max, 8 << vece); 1052*09a52d85SRichard Henderson 1053*09a52d85SRichard Henderson /* 1054*09a52d85SRichard Henderson * The choice of LT (signed) and GEU (unsigned) are biased toward 1055*09a52d85SRichard Henderson * the instructions of the x86_64 host. For MO_8, the whole byte 1056*09a52d85SRichard Henderson * is significant so we must use an unsigned compare; otherwise we 1057*09a52d85SRichard Henderson * have already masked to a byte and so a signed compare works. 1058*09a52d85SRichard Henderson * Other tcg hosts have a full set of comparisons and do not care. 1059*09a52d85SRichard Henderson */ 1060*09a52d85SRichard Henderson if (vece == MO_8) { 1061*09a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 1062*09a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 1063*09a52d85SRichard Henderson tcg_gen_andc_vec(vece, lval, lval, lsh); 1064*09a52d85SRichard Henderson tcg_gen_andc_vec(vece, rval, rval, rsh); 1065*09a52d85SRichard Henderson } else { 1066*09a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 1067*09a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 1068*09a52d85SRichard Henderson tcg_gen_and_vec(vece, lval, lval, lsh); 1069*09a52d85SRichard Henderson tcg_gen_and_vec(vece, rval, rval, rsh); 1070*09a52d85SRichard Henderson } 1071*09a52d85SRichard Henderson tcg_gen_or_vec(vece, dst, lval, rval); 1072*09a52d85SRichard Henderson } 1073*09a52d85SRichard Henderson 1074*09a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1075*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1076*09a52d85SRichard Henderson { 1077*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1078*09a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_shlv_vec, 1079*09a52d85SRichard Henderson INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 1080*09a52d85SRichard Henderson }; 1081*09a52d85SRichard Henderson static const GVecGen3 ops[4] = { 1082*09a52d85SRichard Henderson { .fniv = gen_ushl_vec, 1083*09a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_b, 1084*09a52d85SRichard Henderson .opt_opc = vecop_list, 1085*09a52d85SRichard Henderson .vece = MO_8 }, 1086*09a52d85SRichard Henderson { .fniv = gen_ushl_vec, 1087*09a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_h, 1088*09a52d85SRichard Henderson .opt_opc = vecop_list, 1089*09a52d85SRichard Henderson .vece = MO_16 }, 1090*09a52d85SRichard Henderson { .fni4 = gen_ushl_i32, 1091*09a52d85SRichard Henderson .fniv = gen_ushl_vec, 1092*09a52d85SRichard Henderson .opt_opc = vecop_list, 1093*09a52d85SRichard Henderson .vece = MO_32 }, 1094*09a52d85SRichard Henderson { .fni8 = gen_ushl_i64, 1095*09a52d85SRichard Henderson .fniv = gen_ushl_vec, 1096*09a52d85SRichard Henderson .opt_opc = vecop_list, 1097*09a52d85SRichard Henderson .vece = MO_64 }, 1098*09a52d85SRichard Henderson }; 1099*09a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1100*09a52d85SRichard Henderson } 1101*09a52d85SRichard Henderson 1102*09a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 1103*09a52d85SRichard Henderson { 1104*09a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 1105*09a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 1106*09a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 1107*09a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 1108*09a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 1109*09a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(31); 1110*09a52d85SRichard Henderson 1111*09a52d85SRichard Henderson /* 1112*09a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 1113*09a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 1114*09a52d85SRichard Henderson * Discard out-of-range results after the fact. 1115*09a52d85SRichard Henderson */ 1116*09a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 1117*09a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 1118*09a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 1119*09a52d85SRichard Henderson tcg_gen_umin_i32(rsh, rsh, max); 1120*09a52d85SRichard Henderson tcg_gen_sar_i32(rval, src, rsh); 1121*09a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 1122*09a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 1123*09a52d85SRichard Henderson } 1124*09a52d85SRichard Henderson 1125*09a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1126*09a52d85SRichard Henderson { 1127*09a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 1128*09a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 1129*09a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 1130*09a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 1131*09a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 1132*09a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(63); 1133*09a52d85SRichard Henderson 1134*09a52d85SRichard Henderson /* 1135*09a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 1136*09a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 1137*09a52d85SRichard Henderson * Discard out-of-range results after the fact. 1138*09a52d85SRichard Henderson */ 1139*09a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 1140*09a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 1141*09a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 1142*09a52d85SRichard Henderson tcg_gen_umin_i64(rsh, rsh, max); 1143*09a52d85SRichard Henderson tcg_gen_sar_i64(rval, src, rsh); 1144*09a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 1145*09a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 1146*09a52d85SRichard Henderson } 1147*09a52d85SRichard Henderson 1148*09a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 1149*09a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 1150*09a52d85SRichard Henderson { 1151*09a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1152*09a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1153*09a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1154*09a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1155*09a52d85SRichard Henderson TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 1156*09a52d85SRichard Henderson 1157*09a52d85SRichard Henderson /* 1158*09a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 1159*09a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 1160*09a52d85SRichard Henderson * Discard out-of-range results after the fact. 1161*09a52d85SRichard Henderson */ 1162*09a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 1163*09a52d85SRichard Henderson if (vece == MO_8) { 1164*09a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 1165*09a52d85SRichard Henderson } else { 1166*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0xff); 1167*09a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, tmp); 1168*09a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, tmp); 1169*09a52d85SRichard Henderson } 1170*09a52d85SRichard Henderson 1171*09a52d85SRichard Henderson /* Bound rsh so out of bound right shift gets -1. */ 1172*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 1173*09a52d85SRichard Henderson tcg_gen_umin_vec(vece, rsh, rsh, tmp); 1174*09a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 1175*09a52d85SRichard Henderson 1176*09a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 1177*09a52d85SRichard Henderson tcg_gen_sarv_vec(vece, rval, src, rsh); 1178*09a52d85SRichard Henderson 1179*09a52d85SRichard Henderson /* Select in-bound left shift. */ 1180*09a52d85SRichard Henderson tcg_gen_andc_vec(vece, lval, lval, tmp); 1181*09a52d85SRichard Henderson 1182*09a52d85SRichard Henderson /* Select between left and right shift. */ 1183*09a52d85SRichard Henderson if (vece == MO_8) { 1184*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0); 1185*09a52d85SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 1186*09a52d85SRichard Henderson } else { 1187*09a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0x80); 1188*09a52d85SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 1189*09a52d85SRichard Henderson } 1190*09a52d85SRichard Henderson } 1191*09a52d85SRichard Henderson 1192*09a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1193*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1194*09a52d85SRichard Henderson { 1195*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1196*09a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 1197*09a52d85SRichard Henderson INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 1198*09a52d85SRichard Henderson }; 1199*09a52d85SRichard Henderson static const GVecGen3 ops[4] = { 1200*09a52d85SRichard Henderson { .fniv = gen_sshl_vec, 1201*09a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_b, 1202*09a52d85SRichard Henderson .opt_opc = vecop_list, 1203*09a52d85SRichard Henderson .vece = MO_8 }, 1204*09a52d85SRichard Henderson { .fniv = gen_sshl_vec, 1205*09a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_h, 1206*09a52d85SRichard Henderson .opt_opc = vecop_list, 1207*09a52d85SRichard Henderson .vece = MO_16 }, 1208*09a52d85SRichard Henderson { .fni4 = gen_sshl_i32, 1209*09a52d85SRichard Henderson .fniv = gen_sshl_vec, 1210*09a52d85SRichard Henderson .opt_opc = vecop_list, 1211*09a52d85SRichard Henderson .vece = MO_32 }, 1212*09a52d85SRichard Henderson { .fni8 = gen_sshl_i64, 1213*09a52d85SRichard Henderson .fniv = gen_sshl_vec, 1214*09a52d85SRichard Henderson .opt_opc = vecop_list, 1215*09a52d85SRichard Henderson .vece = MO_64 }, 1216*09a52d85SRichard Henderson }; 1217*09a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1218*09a52d85SRichard Henderson } 1219*09a52d85SRichard Henderson 1220*09a52d85SRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, 1221*09a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 1222*09a52d85SRichard Henderson { 1223*09a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 1224*09a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 1225*09a52d85SRichard Henderson tcg_gen_usadd_vec(vece, t, a, b); 1226*09a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); 1227*09a52d85SRichard Henderson tcg_gen_or_vec(vece, sat, sat, x); 1228*09a52d85SRichard Henderson } 1229*09a52d85SRichard Henderson 1230*09a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1231*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1232*09a52d85SRichard Henderson { 1233*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1234*09a52d85SRichard Henderson INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 1235*09a52d85SRichard Henderson }; 1236*09a52d85SRichard Henderson static const GVecGen4 ops[4] = { 1237*09a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 1238*09a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_b, 1239*09a52d85SRichard Henderson .write_aofs = true, 1240*09a52d85SRichard Henderson .opt_opc = vecop_list, 1241*09a52d85SRichard Henderson .vece = MO_8 }, 1242*09a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 1243*09a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_h, 1244*09a52d85SRichard Henderson .write_aofs = true, 1245*09a52d85SRichard Henderson .opt_opc = vecop_list, 1246*09a52d85SRichard Henderson .vece = MO_16 }, 1247*09a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 1248*09a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_s, 1249*09a52d85SRichard Henderson .write_aofs = true, 1250*09a52d85SRichard Henderson .opt_opc = vecop_list, 1251*09a52d85SRichard Henderson .vece = MO_32 }, 1252*09a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 1253*09a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_d, 1254*09a52d85SRichard Henderson .write_aofs = true, 1255*09a52d85SRichard Henderson .opt_opc = vecop_list, 1256*09a52d85SRichard Henderson .vece = MO_64 }, 1257*09a52d85SRichard Henderson }; 1258*09a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1259*09a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1260*09a52d85SRichard Henderson } 1261*09a52d85SRichard Henderson 1262*09a52d85SRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, 1263*09a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 1264*09a52d85SRichard Henderson { 1265*09a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 1266*09a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 1267*09a52d85SRichard Henderson tcg_gen_ssadd_vec(vece, t, a, b); 1268*09a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); 1269*09a52d85SRichard Henderson tcg_gen_or_vec(vece, sat, sat, x); 1270*09a52d85SRichard Henderson } 1271*09a52d85SRichard Henderson 1272*09a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1273*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1274*09a52d85SRichard Henderson { 1275*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1276*09a52d85SRichard Henderson INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 1277*09a52d85SRichard Henderson }; 1278*09a52d85SRichard Henderson static const GVecGen4 ops[4] = { 1279*09a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 1280*09a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_b, 1281*09a52d85SRichard Henderson .opt_opc = vecop_list, 1282*09a52d85SRichard Henderson .write_aofs = true, 1283*09a52d85SRichard Henderson .vece = MO_8 }, 1284*09a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 1285*09a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_h, 1286*09a52d85SRichard Henderson .opt_opc = vecop_list, 1287*09a52d85SRichard Henderson .write_aofs = true, 1288*09a52d85SRichard Henderson .vece = MO_16 }, 1289*09a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 1290*09a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_s, 1291*09a52d85SRichard Henderson .opt_opc = vecop_list, 1292*09a52d85SRichard Henderson .write_aofs = true, 1293*09a52d85SRichard Henderson .vece = MO_32 }, 1294*09a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 1295*09a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_d, 1296*09a52d85SRichard Henderson .opt_opc = vecop_list, 1297*09a52d85SRichard Henderson .write_aofs = true, 1298*09a52d85SRichard Henderson .vece = MO_64 }, 1299*09a52d85SRichard Henderson }; 1300*09a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1301*09a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1302*09a52d85SRichard Henderson } 1303*09a52d85SRichard Henderson 1304*09a52d85SRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, 1305*09a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 1306*09a52d85SRichard Henderson { 1307*09a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 1308*09a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 1309*09a52d85SRichard Henderson tcg_gen_ussub_vec(vece, t, a, b); 1310*09a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); 1311*09a52d85SRichard Henderson tcg_gen_or_vec(vece, sat, sat, x); 1312*09a52d85SRichard Henderson } 1313*09a52d85SRichard Henderson 1314*09a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1315*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1316*09a52d85SRichard Henderson { 1317*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1318*09a52d85SRichard Henderson INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 1319*09a52d85SRichard Henderson }; 1320*09a52d85SRichard Henderson static const GVecGen4 ops[4] = { 1321*09a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 1322*09a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_b, 1323*09a52d85SRichard Henderson .opt_opc = vecop_list, 1324*09a52d85SRichard Henderson .write_aofs = true, 1325*09a52d85SRichard Henderson .vece = MO_8 }, 1326*09a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 1327*09a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_h, 1328*09a52d85SRichard Henderson .opt_opc = vecop_list, 1329*09a52d85SRichard Henderson .write_aofs = true, 1330*09a52d85SRichard Henderson .vece = MO_16 }, 1331*09a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 1332*09a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_s, 1333*09a52d85SRichard Henderson .opt_opc = vecop_list, 1334*09a52d85SRichard Henderson .write_aofs = true, 1335*09a52d85SRichard Henderson .vece = MO_32 }, 1336*09a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 1337*09a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_d, 1338*09a52d85SRichard Henderson .opt_opc = vecop_list, 1339*09a52d85SRichard Henderson .write_aofs = true, 1340*09a52d85SRichard Henderson .vece = MO_64 }, 1341*09a52d85SRichard Henderson }; 1342*09a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1343*09a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1344*09a52d85SRichard Henderson } 1345*09a52d85SRichard Henderson 1346*09a52d85SRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, 1347*09a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 1348*09a52d85SRichard Henderson { 1349*09a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 1350*09a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 1351*09a52d85SRichard Henderson tcg_gen_sssub_vec(vece, t, a, b); 1352*09a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); 1353*09a52d85SRichard Henderson tcg_gen_or_vec(vece, sat, sat, x); 1354*09a52d85SRichard Henderson } 1355*09a52d85SRichard Henderson 1356*09a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1357*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1358*09a52d85SRichard Henderson { 1359*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1360*09a52d85SRichard Henderson INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 1361*09a52d85SRichard Henderson }; 1362*09a52d85SRichard Henderson static const GVecGen4 ops[4] = { 1363*09a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 1364*09a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_b, 1365*09a52d85SRichard Henderson .opt_opc = vecop_list, 1366*09a52d85SRichard Henderson .write_aofs = true, 1367*09a52d85SRichard Henderson .vece = MO_8 }, 1368*09a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 1369*09a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_h, 1370*09a52d85SRichard Henderson .opt_opc = vecop_list, 1371*09a52d85SRichard Henderson .write_aofs = true, 1372*09a52d85SRichard Henderson .vece = MO_16 }, 1373*09a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 1374*09a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_s, 1375*09a52d85SRichard Henderson .opt_opc = vecop_list, 1376*09a52d85SRichard Henderson .write_aofs = true, 1377*09a52d85SRichard Henderson .vece = MO_32 }, 1378*09a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 1379*09a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_d, 1380*09a52d85SRichard Henderson .opt_opc = vecop_list, 1381*09a52d85SRichard Henderson .write_aofs = true, 1382*09a52d85SRichard Henderson .vece = MO_64 }, 1383*09a52d85SRichard Henderson }; 1384*09a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1385*09a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1386*09a52d85SRichard Henderson } 1387*09a52d85SRichard Henderson 1388*09a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1389*09a52d85SRichard Henderson { 1390*09a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 1391*09a52d85SRichard Henderson 1392*09a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 1393*09a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 1394*09a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 1395*09a52d85SRichard Henderson } 1396*09a52d85SRichard Henderson 1397*09a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1398*09a52d85SRichard Henderson { 1399*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1400*09a52d85SRichard Henderson 1401*09a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 1402*09a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 1403*09a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 1404*09a52d85SRichard Henderson } 1405*09a52d85SRichard Henderson 1406*09a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1407*09a52d85SRichard Henderson { 1408*09a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 1409*09a52d85SRichard Henderson 1410*09a52d85SRichard Henderson tcg_gen_smin_vec(vece, t, a, b); 1411*09a52d85SRichard Henderson tcg_gen_smax_vec(vece, d, a, b); 1412*09a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 1413*09a52d85SRichard Henderson } 1414*09a52d85SRichard Henderson 1415*09a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1416*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1417*09a52d85SRichard Henderson { 1418*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1419*09a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1420*09a52d85SRichard Henderson }; 1421*09a52d85SRichard Henderson static const GVecGen3 ops[4] = { 1422*09a52d85SRichard Henderson { .fniv = gen_sabd_vec, 1423*09a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_b, 1424*09a52d85SRichard Henderson .opt_opc = vecop_list, 1425*09a52d85SRichard Henderson .vece = MO_8 }, 1426*09a52d85SRichard Henderson { .fniv = gen_sabd_vec, 1427*09a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_h, 1428*09a52d85SRichard Henderson .opt_opc = vecop_list, 1429*09a52d85SRichard Henderson .vece = MO_16 }, 1430*09a52d85SRichard Henderson { .fni4 = gen_sabd_i32, 1431*09a52d85SRichard Henderson .fniv = gen_sabd_vec, 1432*09a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_s, 1433*09a52d85SRichard Henderson .opt_opc = vecop_list, 1434*09a52d85SRichard Henderson .vece = MO_32 }, 1435*09a52d85SRichard Henderson { .fni8 = gen_sabd_i64, 1436*09a52d85SRichard Henderson .fniv = gen_sabd_vec, 1437*09a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_d, 1438*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1439*09a52d85SRichard Henderson .opt_opc = vecop_list, 1440*09a52d85SRichard Henderson .vece = MO_64 }, 1441*09a52d85SRichard Henderson }; 1442*09a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1443*09a52d85SRichard Henderson } 1444*09a52d85SRichard Henderson 1445*09a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1446*09a52d85SRichard Henderson { 1447*09a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 1448*09a52d85SRichard Henderson 1449*09a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 1450*09a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 1451*09a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 1452*09a52d85SRichard Henderson } 1453*09a52d85SRichard Henderson 1454*09a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1455*09a52d85SRichard Henderson { 1456*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1457*09a52d85SRichard Henderson 1458*09a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 1459*09a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 1460*09a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 1461*09a52d85SRichard Henderson } 1462*09a52d85SRichard Henderson 1463*09a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1464*09a52d85SRichard Henderson { 1465*09a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 1466*09a52d85SRichard Henderson 1467*09a52d85SRichard Henderson tcg_gen_umin_vec(vece, t, a, b); 1468*09a52d85SRichard Henderson tcg_gen_umax_vec(vece, d, a, b); 1469*09a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 1470*09a52d85SRichard Henderson } 1471*09a52d85SRichard Henderson 1472*09a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1473*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1474*09a52d85SRichard Henderson { 1475*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1476*09a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1477*09a52d85SRichard Henderson }; 1478*09a52d85SRichard Henderson static const GVecGen3 ops[4] = { 1479*09a52d85SRichard Henderson { .fniv = gen_uabd_vec, 1480*09a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_b, 1481*09a52d85SRichard Henderson .opt_opc = vecop_list, 1482*09a52d85SRichard Henderson .vece = MO_8 }, 1483*09a52d85SRichard Henderson { .fniv = gen_uabd_vec, 1484*09a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_h, 1485*09a52d85SRichard Henderson .opt_opc = vecop_list, 1486*09a52d85SRichard Henderson .vece = MO_16 }, 1487*09a52d85SRichard Henderson { .fni4 = gen_uabd_i32, 1488*09a52d85SRichard Henderson .fniv = gen_uabd_vec, 1489*09a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_s, 1490*09a52d85SRichard Henderson .opt_opc = vecop_list, 1491*09a52d85SRichard Henderson .vece = MO_32 }, 1492*09a52d85SRichard Henderson { .fni8 = gen_uabd_i64, 1493*09a52d85SRichard Henderson .fniv = gen_uabd_vec, 1494*09a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_d, 1495*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1496*09a52d85SRichard Henderson .opt_opc = vecop_list, 1497*09a52d85SRichard Henderson .vece = MO_64 }, 1498*09a52d85SRichard Henderson }; 1499*09a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1500*09a52d85SRichard Henderson } 1501*09a52d85SRichard Henderson 1502*09a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1503*09a52d85SRichard Henderson { 1504*09a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 1505*09a52d85SRichard Henderson gen_sabd_i32(t, a, b); 1506*09a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 1507*09a52d85SRichard Henderson } 1508*09a52d85SRichard Henderson 1509*09a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1510*09a52d85SRichard Henderson { 1511*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1512*09a52d85SRichard Henderson gen_sabd_i64(t, a, b); 1513*09a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 1514*09a52d85SRichard Henderson } 1515*09a52d85SRichard Henderson 1516*09a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1517*09a52d85SRichard Henderson { 1518*09a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 1519*09a52d85SRichard Henderson gen_sabd_vec(vece, t, a, b); 1520*09a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 1521*09a52d85SRichard Henderson } 1522*09a52d85SRichard Henderson 1523*09a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1524*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1525*09a52d85SRichard Henderson { 1526*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1527*09a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 1528*09a52d85SRichard Henderson INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1529*09a52d85SRichard Henderson }; 1530*09a52d85SRichard Henderson static const GVecGen3 ops[4] = { 1531*09a52d85SRichard Henderson { .fniv = gen_saba_vec, 1532*09a52d85SRichard Henderson .fno = gen_helper_gvec_saba_b, 1533*09a52d85SRichard Henderson .opt_opc = vecop_list, 1534*09a52d85SRichard Henderson .load_dest = true, 1535*09a52d85SRichard Henderson .vece = MO_8 }, 1536*09a52d85SRichard Henderson { .fniv = gen_saba_vec, 1537*09a52d85SRichard Henderson .fno = gen_helper_gvec_saba_h, 1538*09a52d85SRichard Henderson .opt_opc = vecop_list, 1539*09a52d85SRichard Henderson .load_dest = true, 1540*09a52d85SRichard Henderson .vece = MO_16 }, 1541*09a52d85SRichard Henderson { .fni4 = gen_saba_i32, 1542*09a52d85SRichard Henderson .fniv = gen_saba_vec, 1543*09a52d85SRichard Henderson .fno = gen_helper_gvec_saba_s, 1544*09a52d85SRichard Henderson .opt_opc = vecop_list, 1545*09a52d85SRichard Henderson .load_dest = true, 1546*09a52d85SRichard Henderson .vece = MO_32 }, 1547*09a52d85SRichard Henderson { .fni8 = gen_saba_i64, 1548*09a52d85SRichard Henderson .fniv = gen_saba_vec, 1549*09a52d85SRichard Henderson .fno = gen_helper_gvec_saba_d, 1550*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1551*09a52d85SRichard Henderson .opt_opc = vecop_list, 1552*09a52d85SRichard Henderson .load_dest = true, 1553*09a52d85SRichard Henderson .vece = MO_64 }, 1554*09a52d85SRichard Henderson }; 1555*09a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1556*09a52d85SRichard Henderson } 1557*09a52d85SRichard Henderson 1558*09a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1559*09a52d85SRichard Henderson { 1560*09a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 1561*09a52d85SRichard Henderson gen_uabd_i32(t, a, b); 1562*09a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 1563*09a52d85SRichard Henderson } 1564*09a52d85SRichard Henderson 1565*09a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1566*09a52d85SRichard Henderson { 1567*09a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1568*09a52d85SRichard Henderson gen_uabd_i64(t, a, b); 1569*09a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 1570*09a52d85SRichard Henderson } 1571*09a52d85SRichard Henderson 1572*09a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1573*09a52d85SRichard Henderson { 1574*09a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 1575*09a52d85SRichard Henderson gen_uabd_vec(vece, t, a, b); 1576*09a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 1577*09a52d85SRichard Henderson } 1578*09a52d85SRichard Henderson 1579*09a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1580*09a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1581*09a52d85SRichard Henderson { 1582*09a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 1583*09a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 1584*09a52d85SRichard Henderson INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1585*09a52d85SRichard Henderson }; 1586*09a52d85SRichard Henderson static const GVecGen3 ops[4] = { 1587*09a52d85SRichard Henderson { .fniv = gen_uaba_vec, 1588*09a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_b, 1589*09a52d85SRichard Henderson .opt_opc = vecop_list, 1590*09a52d85SRichard Henderson .load_dest = true, 1591*09a52d85SRichard Henderson .vece = MO_8 }, 1592*09a52d85SRichard Henderson { .fniv = gen_uaba_vec, 1593*09a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_h, 1594*09a52d85SRichard Henderson .opt_opc = vecop_list, 1595*09a52d85SRichard Henderson .load_dest = true, 1596*09a52d85SRichard Henderson .vece = MO_16 }, 1597*09a52d85SRichard Henderson { .fni4 = gen_uaba_i32, 1598*09a52d85SRichard Henderson .fniv = gen_uaba_vec, 1599*09a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_s, 1600*09a52d85SRichard Henderson .opt_opc = vecop_list, 1601*09a52d85SRichard Henderson .load_dest = true, 1602*09a52d85SRichard Henderson .vece = MO_32 }, 1603*09a52d85SRichard Henderson { .fni8 = gen_uaba_i64, 1604*09a52d85SRichard Henderson .fniv = gen_uaba_vec, 1605*09a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_d, 1606*09a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1607*09a52d85SRichard Henderson .opt_opc = vecop_list, 1608*09a52d85SRichard Henderson .load_dest = true, 1609*09a52d85SRichard Henderson .vece = MO_64 }, 1610*09a52d85SRichard Henderson }; 1611*09a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1612*09a52d85SRichard Henderson } 1613