1*f0984d40SFabiano Rosas /* 2*f0984d40SFabiano Rosas * ARM translation: AArch32 Neon instructions 3*f0984d40SFabiano Rosas * 4*f0984d40SFabiano Rosas * Copyright (c) 2003 Fabrice Bellard 5*f0984d40SFabiano Rosas * Copyright (c) 2005-2007 CodeSourcery 6*f0984d40SFabiano Rosas * Copyright (c) 2007 OpenedHand, Ltd. 7*f0984d40SFabiano Rosas * Copyright (c) 2020 Linaro, Ltd. 8*f0984d40SFabiano Rosas * 9*f0984d40SFabiano Rosas * This library is free software; you can redistribute it and/or 10*f0984d40SFabiano Rosas * modify it under the terms of the GNU Lesser General Public 11*f0984d40SFabiano Rosas * License as published by the Free Software Foundation; either 12*f0984d40SFabiano Rosas * version 2.1 of the License, or (at your option) any later version. 13*f0984d40SFabiano Rosas * 14*f0984d40SFabiano Rosas * This library is distributed in the hope that it will be useful, 15*f0984d40SFabiano Rosas * but WITHOUT ANY WARRANTY; without even the implied warranty of 16*f0984d40SFabiano Rosas * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17*f0984d40SFabiano Rosas * Lesser General Public License for more details. 18*f0984d40SFabiano Rosas * 19*f0984d40SFabiano Rosas * You should have received a copy of the GNU Lesser General Public 20*f0984d40SFabiano Rosas * License along with this library; if not, see <http://www.gnu.org/licenses/>. 21*f0984d40SFabiano Rosas */ 22*f0984d40SFabiano Rosas 23*f0984d40SFabiano Rosas #include "qemu/osdep.h" 24*f0984d40SFabiano Rosas #include "tcg/tcg-op.h" 25*f0984d40SFabiano Rosas #include "tcg/tcg-op-gvec.h" 26*f0984d40SFabiano Rosas #include "exec/exec-all.h" 27*f0984d40SFabiano Rosas #include "exec/gen-icount.h" 28*f0984d40SFabiano Rosas #include "translate.h" 29*f0984d40SFabiano Rosas #include "translate-a32.h" 30*f0984d40SFabiano Rosas 31*f0984d40SFabiano Rosas /* Include the generated Neon decoder */ 32*f0984d40SFabiano Rosas #include "decode-neon-dp.c.inc" 33*f0984d40SFabiano Rosas #include "decode-neon-ls.c.inc" 34*f0984d40SFabiano Rosas #include "decode-neon-shared.c.inc" 35*f0984d40SFabiano Rosas 36*f0984d40SFabiano Rosas static TCGv_ptr vfp_reg_ptr(bool dp, int reg) 37*f0984d40SFabiano Rosas { 38*f0984d40SFabiano Rosas TCGv_ptr ret = tcg_temp_new_ptr(); 39*f0984d40SFabiano Rosas tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg)); 40*f0984d40SFabiano Rosas return ret; 41*f0984d40SFabiano Rosas } 42*f0984d40SFabiano Rosas 43*f0984d40SFabiano Rosas static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop) 44*f0984d40SFabiano Rosas { 45*f0984d40SFabiano Rosas long offset = neon_element_offset(reg, ele, mop & MO_SIZE); 46*f0984d40SFabiano Rosas 47*f0984d40SFabiano Rosas switch (mop) { 48*f0984d40SFabiano Rosas case MO_UB: 49*f0984d40SFabiano Rosas tcg_gen_ld8u_i32(var, cpu_env, offset); 50*f0984d40SFabiano Rosas break; 51*f0984d40SFabiano Rosas case MO_UW: 52*f0984d40SFabiano Rosas tcg_gen_ld16u_i32(var, cpu_env, offset); 53*f0984d40SFabiano Rosas break; 54*f0984d40SFabiano Rosas case MO_UL: 55*f0984d40SFabiano Rosas tcg_gen_ld_i32(var, cpu_env, offset); 56*f0984d40SFabiano Rosas break; 57*f0984d40SFabiano Rosas default: 58*f0984d40SFabiano Rosas g_assert_not_reached(); 59*f0984d40SFabiano Rosas } 60*f0984d40SFabiano Rosas } 61*f0984d40SFabiano Rosas 62*f0984d40SFabiano Rosas static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop) 63*f0984d40SFabiano Rosas { 64*f0984d40SFabiano Rosas long offset = neon_element_offset(reg, ele, mop & MO_SIZE); 65*f0984d40SFabiano Rosas 66*f0984d40SFabiano Rosas switch (mop) { 67*f0984d40SFabiano Rosas case MO_UB: 68*f0984d40SFabiano Rosas tcg_gen_ld8u_i64(var, cpu_env, offset); 69*f0984d40SFabiano Rosas break; 70*f0984d40SFabiano Rosas case MO_UW: 71*f0984d40SFabiano Rosas tcg_gen_ld16u_i64(var, cpu_env, offset); 72*f0984d40SFabiano Rosas break; 73*f0984d40SFabiano Rosas case MO_UL: 74*f0984d40SFabiano Rosas tcg_gen_ld32u_i64(var, cpu_env, offset); 75*f0984d40SFabiano Rosas break; 76*f0984d40SFabiano Rosas case MO_UQ: 77*f0984d40SFabiano Rosas tcg_gen_ld_i64(var, cpu_env, offset); 78*f0984d40SFabiano Rosas break; 79*f0984d40SFabiano Rosas default: 80*f0984d40SFabiano Rosas g_assert_not_reached(); 81*f0984d40SFabiano Rosas } 82*f0984d40SFabiano Rosas } 83*f0984d40SFabiano Rosas 84*f0984d40SFabiano Rosas static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var) 85*f0984d40SFabiano Rosas { 86*f0984d40SFabiano Rosas long offset = neon_element_offset(reg, ele, size); 87*f0984d40SFabiano Rosas 88*f0984d40SFabiano Rosas switch (size) { 89*f0984d40SFabiano Rosas case MO_8: 90*f0984d40SFabiano Rosas tcg_gen_st8_i32(var, cpu_env, offset); 91*f0984d40SFabiano Rosas break; 92*f0984d40SFabiano Rosas case MO_16: 93*f0984d40SFabiano Rosas tcg_gen_st16_i32(var, cpu_env, offset); 94*f0984d40SFabiano Rosas break; 95*f0984d40SFabiano Rosas case MO_32: 96*f0984d40SFabiano Rosas tcg_gen_st_i32(var, cpu_env, offset); 97*f0984d40SFabiano Rosas break; 98*f0984d40SFabiano Rosas default: 99*f0984d40SFabiano Rosas g_assert_not_reached(); 100*f0984d40SFabiano Rosas } 101*f0984d40SFabiano Rosas } 102*f0984d40SFabiano Rosas 103*f0984d40SFabiano Rosas static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var) 104*f0984d40SFabiano Rosas { 105*f0984d40SFabiano Rosas long offset = neon_element_offset(reg, ele, size); 106*f0984d40SFabiano Rosas 107*f0984d40SFabiano Rosas switch (size) { 108*f0984d40SFabiano Rosas case MO_8: 109*f0984d40SFabiano Rosas tcg_gen_st8_i64(var, cpu_env, offset); 110*f0984d40SFabiano Rosas break; 111*f0984d40SFabiano Rosas case MO_16: 112*f0984d40SFabiano Rosas tcg_gen_st16_i64(var, cpu_env, offset); 113*f0984d40SFabiano Rosas break; 114*f0984d40SFabiano Rosas case MO_32: 115*f0984d40SFabiano Rosas tcg_gen_st32_i64(var, cpu_env, offset); 116*f0984d40SFabiano Rosas break; 117*f0984d40SFabiano Rosas case MO_64: 118*f0984d40SFabiano Rosas tcg_gen_st_i64(var, cpu_env, offset); 119*f0984d40SFabiano Rosas break; 120*f0984d40SFabiano Rosas default: 121*f0984d40SFabiano Rosas g_assert_not_reached(); 122*f0984d40SFabiano Rosas } 123*f0984d40SFabiano Rosas } 124*f0984d40SFabiano Rosas 125*f0984d40SFabiano Rosas static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm, 126*f0984d40SFabiano Rosas int data, gen_helper_gvec_4 *fn_gvec) 127*f0984d40SFabiano Rosas { 128*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 129*f0984d40SFabiano Rosas if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) { 130*f0984d40SFabiano Rosas return false; 131*f0984d40SFabiano Rosas } 132*f0984d40SFabiano Rosas 133*f0984d40SFabiano Rosas /* 134*f0984d40SFabiano Rosas * UNDEF accesses to odd registers for each bit of Q. 135*f0984d40SFabiano Rosas * Q will be 0b111 for all Q-reg instructions, otherwise 136*f0984d40SFabiano Rosas * when we have mixed Q- and D-reg inputs. 137*f0984d40SFabiano Rosas */ 138*f0984d40SFabiano Rosas if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) { 139*f0984d40SFabiano Rosas return false; 140*f0984d40SFabiano Rosas } 141*f0984d40SFabiano Rosas 142*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 143*f0984d40SFabiano Rosas return true; 144*f0984d40SFabiano Rosas } 145*f0984d40SFabiano Rosas 146*f0984d40SFabiano Rosas int opr_sz = q ? 16 : 8; 147*f0984d40SFabiano Rosas tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd), 148*f0984d40SFabiano Rosas vfp_reg_offset(1, vn), 149*f0984d40SFabiano Rosas vfp_reg_offset(1, vm), 150*f0984d40SFabiano Rosas vfp_reg_offset(1, vd), 151*f0984d40SFabiano Rosas opr_sz, opr_sz, data, fn_gvec); 152*f0984d40SFabiano Rosas return true; 153*f0984d40SFabiano Rosas } 154*f0984d40SFabiano Rosas 155*f0984d40SFabiano Rosas static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm, 156*f0984d40SFabiano Rosas int data, ARMFPStatusFlavour fp_flavour, 157*f0984d40SFabiano Rosas gen_helper_gvec_4_ptr *fn_gvec_ptr) 158*f0984d40SFabiano Rosas { 159*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 160*f0984d40SFabiano Rosas if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) { 161*f0984d40SFabiano Rosas return false; 162*f0984d40SFabiano Rosas } 163*f0984d40SFabiano Rosas 164*f0984d40SFabiano Rosas /* 165*f0984d40SFabiano Rosas * UNDEF accesses to odd registers for each bit of Q. 166*f0984d40SFabiano Rosas * Q will be 0b111 for all Q-reg instructions, otherwise 167*f0984d40SFabiano Rosas * when we have mixed Q- and D-reg inputs. 168*f0984d40SFabiano Rosas */ 169*f0984d40SFabiano Rosas if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) { 170*f0984d40SFabiano Rosas return false; 171*f0984d40SFabiano Rosas } 172*f0984d40SFabiano Rosas 173*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 174*f0984d40SFabiano Rosas return true; 175*f0984d40SFabiano Rosas } 176*f0984d40SFabiano Rosas 177*f0984d40SFabiano Rosas int opr_sz = q ? 16 : 8; 178*f0984d40SFabiano Rosas TCGv_ptr fpst = fpstatus_ptr(fp_flavour); 179*f0984d40SFabiano Rosas 180*f0984d40SFabiano Rosas tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd), 181*f0984d40SFabiano Rosas vfp_reg_offset(1, vn), 182*f0984d40SFabiano Rosas vfp_reg_offset(1, vm), 183*f0984d40SFabiano Rosas vfp_reg_offset(1, vd), 184*f0984d40SFabiano Rosas fpst, opr_sz, opr_sz, data, fn_gvec_ptr); 185*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpst); 186*f0984d40SFabiano Rosas return true; 187*f0984d40SFabiano Rosas } 188*f0984d40SFabiano Rosas 189*f0984d40SFabiano Rosas static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a) 190*f0984d40SFabiano Rosas { 191*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_vcma, s)) { 192*f0984d40SFabiano Rosas return false; 193*f0984d40SFabiano Rosas } 194*f0984d40SFabiano Rosas if (a->size == MO_16) { 195*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { 196*f0984d40SFabiano Rosas return false; 197*f0984d40SFabiano Rosas } 198*f0984d40SFabiano Rosas return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot, 199*f0984d40SFabiano Rosas FPST_STD_F16, gen_helper_gvec_fcmlah); 200*f0984d40SFabiano Rosas } 201*f0984d40SFabiano Rosas return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot, 202*f0984d40SFabiano Rosas FPST_STD, gen_helper_gvec_fcmlas); 203*f0984d40SFabiano Rosas } 204*f0984d40SFabiano Rosas 205*f0984d40SFabiano Rosas static bool trans_VCADD(DisasContext *s, arg_VCADD *a) 206*f0984d40SFabiano Rosas { 207*f0984d40SFabiano Rosas int opr_sz; 208*f0984d40SFabiano Rosas TCGv_ptr fpst; 209*f0984d40SFabiano Rosas gen_helper_gvec_3_ptr *fn_gvec_ptr; 210*f0984d40SFabiano Rosas 211*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_vcma, s) 212*f0984d40SFabiano Rosas || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) { 213*f0984d40SFabiano Rosas return false; 214*f0984d40SFabiano Rosas } 215*f0984d40SFabiano Rosas 216*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 217*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 218*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 219*f0984d40SFabiano Rosas return false; 220*f0984d40SFabiano Rosas } 221*f0984d40SFabiano Rosas 222*f0984d40SFabiano Rosas if ((a->vn | a->vm | a->vd) & a->q) { 223*f0984d40SFabiano Rosas return false; 224*f0984d40SFabiano Rosas } 225*f0984d40SFabiano Rosas 226*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 227*f0984d40SFabiano Rosas return true; 228*f0984d40SFabiano Rosas } 229*f0984d40SFabiano Rosas 230*f0984d40SFabiano Rosas opr_sz = (1 + a->q) * 8; 231*f0984d40SFabiano Rosas fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 232*f0984d40SFabiano Rosas fn_gvec_ptr = (a->size == MO_16) ? 233*f0984d40SFabiano Rosas gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds; 234*f0984d40SFabiano Rosas tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 235*f0984d40SFabiano Rosas vfp_reg_offset(1, a->vn), 236*f0984d40SFabiano Rosas vfp_reg_offset(1, a->vm), 237*f0984d40SFabiano Rosas fpst, opr_sz, opr_sz, a->rot, 238*f0984d40SFabiano Rosas fn_gvec_ptr); 239*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpst); 240*f0984d40SFabiano Rosas return true; 241*f0984d40SFabiano Rosas } 242*f0984d40SFabiano Rosas 243*f0984d40SFabiano Rosas static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a) 244*f0984d40SFabiano Rosas { 245*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_dp, s)) { 246*f0984d40SFabiano Rosas return false; 247*f0984d40SFabiano Rosas } 248*f0984d40SFabiano Rosas return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 249*f0984d40SFabiano Rosas gen_helper_gvec_sdot_b); 250*f0984d40SFabiano Rosas } 251*f0984d40SFabiano Rosas 252*f0984d40SFabiano Rosas static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a) 253*f0984d40SFabiano Rosas { 254*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_dp, s)) { 255*f0984d40SFabiano Rosas return false; 256*f0984d40SFabiano Rosas } 257*f0984d40SFabiano Rosas return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 258*f0984d40SFabiano Rosas gen_helper_gvec_udot_b); 259*f0984d40SFabiano Rosas } 260*f0984d40SFabiano Rosas 261*f0984d40SFabiano Rosas static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a) 262*f0984d40SFabiano Rosas { 263*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_i8mm, s)) { 264*f0984d40SFabiano Rosas return false; 265*f0984d40SFabiano Rosas } 266*f0984d40SFabiano Rosas return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 267*f0984d40SFabiano Rosas gen_helper_gvec_usdot_b); 268*f0984d40SFabiano Rosas } 269*f0984d40SFabiano Rosas 270*f0984d40SFabiano Rosas static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a) 271*f0984d40SFabiano Rosas { 272*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_bf16, s)) { 273*f0984d40SFabiano Rosas return false; 274*f0984d40SFabiano Rosas } 275*f0984d40SFabiano Rosas return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 276*f0984d40SFabiano Rosas gen_helper_gvec_bfdot); 277*f0984d40SFabiano Rosas } 278*f0984d40SFabiano Rosas 279*f0984d40SFabiano Rosas static bool trans_VFML(DisasContext *s, arg_VFML *a) 280*f0984d40SFabiano Rosas { 281*f0984d40SFabiano Rosas int opr_sz; 282*f0984d40SFabiano Rosas 283*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fhm, s)) { 284*f0984d40SFabiano Rosas return false; 285*f0984d40SFabiano Rosas } 286*f0984d40SFabiano Rosas 287*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 288*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 289*f0984d40SFabiano Rosas (a->vd & 0x10)) { 290*f0984d40SFabiano Rosas return false; 291*f0984d40SFabiano Rosas } 292*f0984d40SFabiano Rosas 293*f0984d40SFabiano Rosas if (a->vd & a->q) { 294*f0984d40SFabiano Rosas return false; 295*f0984d40SFabiano Rosas } 296*f0984d40SFabiano Rosas 297*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 298*f0984d40SFabiano Rosas return true; 299*f0984d40SFabiano Rosas } 300*f0984d40SFabiano Rosas 301*f0984d40SFabiano Rosas opr_sz = (1 + a->q) * 8; 302*f0984d40SFabiano Rosas tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 303*f0984d40SFabiano Rosas vfp_reg_offset(a->q, a->vn), 304*f0984d40SFabiano Rosas vfp_reg_offset(a->q, a->vm), 305*f0984d40SFabiano Rosas cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ 306*f0984d40SFabiano Rosas gen_helper_gvec_fmlal_a32); 307*f0984d40SFabiano Rosas return true; 308*f0984d40SFabiano Rosas } 309*f0984d40SFabiano Rosas 310*f0984d40SFabiano Rosas static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a) 311*f0984d40SFabiano Rosas { 312*f0984d40SFabiano Rosas int data = (a->index << 2) | a->rot; 313*f0984d40SFabiano Rosas 314*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_vcma, s)) { 315*f0984d40SFabiano Rosas return false; 316*f0984d40SFabiano Rosas } 317*f0984d40SFabiano Rosas if (a->size == MO_16) { 318*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { 319*f0984d40SFabiano Rosas return false; 320*f0984d40SFabiano Rosas } 321*f0984d40SFabiano Rosas return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data, 322*f0984d40SFabiano Rosas FPST_STD_F16, gen_helper_gvec_fcmlah_idx); 323*f0984d40SFabiano Rosas } 324*f0984d40SFabiano Rosas return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data, 325*f0984d40SFabiano Rosas FPST_STD, gen_helper_gvec_fcmlas_idx); 326*f0984d40SFabiano Rosas } 327*f0984d40SFabiano Rosas 328*f0984d40SFabiano Rosas static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a) 329*f0984d40SFabiano Rosas { 330*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_dp, s)) { 331*f0984d40SFabiano Rosas return false; 332*f0984d40SFabiano Rosas } 333*f0984d40SFabiano Rosas return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 334*f0984d40SFabiano Rosas gen_helper_gvec_sdot_idx_b); 335*f0984d40SFabiano Rosas } 336*f0984d40SFabiano Rosas 337*f0984d40SFabiano Rosas static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a) 338*f0984d40SFabiano Rosas { 339*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_dp, s)) { 340*f0984d40SFabiano Rosas return false; 341*f0984d40SFabiano Rosas } 342*f0984d40SFabiano Rosas return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 343*f0984d40SFabiano Rosas gen_helper_gvec_udot_idx_b); 344*f0984d40SFabiano Rosas } 345*f0984d40SFabiano Rosas 346*f0984d40SFabiano Rosas static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a) 347*f0984d40SFabiano Rosas { 348*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_i8mm, s)) { 349*f0984d40SFabiano Rosas return false; 350*f0984d40SFabiano Rosas } 351*f0984d40SFabiano Rosas return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 352*f0984d40SFabiano Rosas gen_helper_gvec_usdot_idx_b); 353*f0984d40SFabiano Rosas } 354*f0984d40SFabiano Rosas 355*f0984d40SFabiano Rosas static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a) 356*f0984d40SFabiano Rosas { 357*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_i8mm, s)) { 358*f0984d40SFabiano Rosas return false; 359*f0984d40SFabiano Rosas } 360*f0984d40SFabiano Rosas return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 361*f0984d40SFabiano Rosas gen_helper_gvec_sudot_idx_b); 362*f0984d40SFabiano Rosas } 363*f0984d40SFabiano Rosas 364*f0984d40SFabiano Rosas static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a) 365*f0984d40SFabiano Rosas { 366*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_bf16, s)) { 367*f0984d40SFabiano Rosas return false; 368*f0984d40SFabiano Rosas } 369*f0984d40SFabiano Rosas return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 370*f0984d40SFabiano Rosas gen_helper_gvec_bfdot_idx); 371*f0984d40SFabiano Rosas } 372*f0984d40SFabiano Rosas 373*f0984d40SFabiano Rosas static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) 374*f0984d40SFabiano Rosas { 375*f0984d40SFabiano Rosas int opr_sz; 376*f0984d40SFabiano Rosas 377*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fhm, s)) { 378*f0984d40SFabiano Rosas return false; 379*f0984d40SFabiano Rosas } 380*f0984d40SFabiano Rosas 381*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 382*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 383*f0984d40SFabiano Rosas ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) { 384*f0984d40SFabiano Rosas return false; 385*f0984d40SFabiano Rosas } 386*f0984d40SFabiano Rosas 387*f0984d40SFabiano Rosas if (a->vd & a->q) { 388*f0984d40SFabiano Rosas return false; 389*f0984d40SFabiano Rosas } 390*f0984d40SFabiano Rosas 391*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 392*f0984d40SFabiano Rosas return true; 393*f0984d40SFabiano Rosas } 394*f0984d40SFabiano Rosas 395*f0984d40SFabiano Rosas opr_sz = (1 + a->q) * 8; 396*f0984d40SFabiano Rosas tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 397*f0984d40SFabiano Rosas vfp_reg_offset(a->q, a->vn), 398*f0984d40SFabiano Rosas vfp_reg_offset(a->q, a->rm), 399*f0984d40SFabiano Rosas cpu_env, opr_sz, opr_sz, 400*f0984d40SFabiano Rosas (a->index << 2) | a->s, /* is_2 == 0 */ 401*f0984d40SFabiano Rosas gen_helper_gvec_fmlal_idx_a32); 402*f0984d40SFabiano Rosas return true; 403*f0984d40SFabiano Rosas } 404*f0984d40SFabiano Rosas 405*f0984d40SFabiano Rosas static struct { 406*f0984d40SFabiano Rosas int nregs; 407*f0984d40SFabiano Rosas int interleave; 408*f0984d40SFabiano Rosas int spacing; 409*f0984d40SFabiano Rosas } const neon_ls_element_type[11] = { 410*f0984d40SFabiano Rosas {1, 4, 1}, 411*f0984d40SFabiano Rosas {1, 4, 2}, 412*f0984d40SFabiano Rosas {4, 1, 1}, 413*f0984d40SFabiano Rosas {2, 2, 2}, 414*f0984d40SFabiano Rosas {1, 3, 1}, 415*f0984d40SFabiano Rosas {1, 3, 2}, 416*f0984d40SFabiano Rosas {3, 1, 1}, 417*f0984d40SFabiano Rosas {1, 1, 1}, 418*f0984d40SFabiano Rosas {1, 2, 1}, 419*f0984d40SFabiano Rosas {1, 2, 2}, 420*f0984d40SFabiano Rosas {2, 1, 1} 421*f0984d40SFabiano Rosas }; 422*f0984d40SFabiano Rosas 423*f0984d40SFabiano Rosas static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn, 424*f0984d40SFabiano Rosas int stride) 425*f0984d40SFabiano Rosas { 426*f0984d40SFabiano Rosas if (rm != 15) { 427*f0984d40SFabiano Rosas TCGv_i32 base; 428*f0984d40SFabiano Rosas 429*f0984d40SFabiano Rosas base = load_reg(s, rn); 430*f0984d40SFabiano Rosas if (rm == 13) { 431*f0984d40SFabiano Rosas tcg_gen_addi_i32(base, base, stride); 432*f0984d40SFabiano Rosas } else { 433*f0984d40SFabiano Rosas TCGv_i32 index; 434*f0984d40SFabiano Rosas index = load_reg(s, rm); 435*f0984d40SFabiano Rosas tcg_gen_add_i32(base, base, index); 436*f0984d40SFabiano Rosas tcg_temp_free_i32(index); 437*f0984d40SFabiano Rosas } 438*f0984d40SFabiano Rosas store_reg(s, rn, base); 439*f0984d40SFabiano Rosas } 440*f0984d40SFabiano Rosas } 441*f0984d40SFabiano Rosas 442*f0984d40SFabiano Rosas static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) 443*f0984d40SFabiano Rosas { 444*f0984d40SFabiano Rosas /* Neon load/store multiple structures */ 445*f0984d40SFabiano Rosas int nregs, interleave, spacing, reg, n; 446*f0984d40SFabiano Rosas MemOp mop, align, endian; 447*f0984d40SFabiano Rosas int mmu_idx = get_mem_index(s); 448*f0984d40SFabiano Rosas int size = a->size; 449*f0984d40SFabiano Rosas TCGv_i64 tmp64; 450*f0984d40SFabiano Rosas TCGv_i32 addr; 451*f0984d40SFabiano Rosas 452*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 453*f0984d40SFabiano Rosas return false; 454*f0984d40SFabiano Rosas } 455*f0984d40SFabiano Rosas 456*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist */ 457*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 458*f0984d40SFabiano Rosas return false; 459*f0984d40SFabiano Rosas } 460*f0984d40SFabiano Rosas if (a->itype > 10) { 461*f0984d40SFabiano Rosas return false; 462*f0984d40SFabiano Rosas } 463*f0984d40SFabiano Rosas /* Catch UNDEF cases for bad values of align field */ 464*f0984d40SFabiano Rosas switch (a->itype & 0xc) { 465*f0984d40SFabiano Rosas case 4: 466*f0984d40SFabiano Rosas if (a->align >= 2) { 467*f0984d40SFabiano Rosas return false; 468*f0984d40SFabiano Rosas } 469*f0984d40SFabiano Rosas break; 470*f0984d40SFabiano Rosas case 8: 471*f0984d40SFabiano Rosas if (a->align == 3) { 472*f0984d40SFabiano Rosas return false; 473*f0984d40SFabiano Rosas } 474*f0984d40SFabiano Rosas break; 475*f0984d40SFabiano Rosas default: 476*f0984d40SFabiano Rosas break; 477*f0984d40SFabiano Rosas } 478*f0984d40SFabiano Rosas nregs = neon_ls_element_type[a->itype].nregs; 479*f0984d40SFabiano Rosas interleave = neon_ls_element_type[a->itype].interleave; 480*f0984d40SFabiano Rosas spacing = neon_ls_element_type[a->itype].spacing; 481*f0984d40SFabiano Rosas if (size == 3 && (interleave | spacing) != 1) { 482*f0984d40SFabiano Rosas return false; 483*f0984d40SFabiano Rosas } 484*f0984d40SFabiano Rosas 485*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 486*f0984d40SFabiano Rosas return true; 487*f0984d40SFabiano Rosas } 488*f0984d40SFabiano Rosas 489*f0984d40SFabiano Rosas /* For our purposes, bytes are always little-endian. */ 490*f0984d40SFabiano Rosas endian = s->be_data; 491*f0984d40SFabiano Rosas if (size == 0) { 492*f0984d40SFabiano Rosas endian = MO_LE; 493*f0984d40SFabiano Rosas } 494*f0984d40SFabiano Rosas 495*f0984d40SFabiano Rosas /* Enforce alignment requested by the instruction */ 496*f0984d40SFabiano Rosas if (a->align) { 497*f0984d40SFabiano Rosas align = pow2_align(a->align + 2); /* 4 ** a->align */ 498*f0984d40SFabiano Rosas } else { 499*f0984d40SFabiano Rosas align = s->align_mem ? MO_ALIGN : 0; 500*f0984d40SFabiano Rosas } 501*f0984d40SFabiano Rosas 502*f0984d40SFabiano Rosas /* 503*f0984d40SFabiano Rosas * Consecutive little-endian elements from a single register 504*f0984d40SFabiano Rosas * can be promoted to a larger little-endian operation. 505*f0984d40SFabiano Rosas */ 506*f0984d40SFabiano Rosas if (interleave == 1 && endian == MO_LE) { 507*f0984d40SFabiano Rosas /* Retain any natural alignment. */ 508*f0984d40SFabiano Rosas if (align == MO_ALIGN) { 509*f0984d40SFabiano Rosas align = pow2_align(size); 510*f0984d40SFabiano Rosas } 511*f0984d40SFabiano Rosas size = 3; 512*f0984d40SFabiano Rosas } 513*f0984d40SFabiano Rosas 514*f0984d40SFabiano Rosas tmp64 = tcg_temp_new_i64(); 515*f0984d40SFabiano Rosas addr = tcg_temp_new_i32(); 516*f0984d40SFabiano Rosas load_reg_var(s, addr, a->rn); 517*f0984d40SFabiano Rosas 518*f0984d40SFabiano Rosas mop = endian | size | align; 519*f0984d40SFabiano Rosas for (reg = 0; reg < nregs; reg++) { 520*f0984d40SFabiano Rosas for (n = 0; n < 8 >> size; n++) { 521*f0984d40SFabiano Rosas int xs; 522*f0984d40SFabiano Rosas for (xs = 0; xs < interleave; xs++) { 523*f0984d40SFabiano Rosas int tt = a->vd + reg + spacing * xs; 524*f0984d40SFabiano Rosas 525*f0984d40SFabiano Rosas if (a->l) { 526*f0984d40SFabiano Rosas gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop); 527*f0984d40SFabiano Rosas neon_store_element64(tt, n, size, tmp64); 528*f0984d40SFabiano Rosas } else { 529*f0984d40SFabiano Rosas neon_load_element64(tmp64, tt, n, size); 530*f0984d40SFabiano Rosas gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop); 531*f0984d40SFabiano Rosas } 532*f0984d40SFabiano Rosas tcg_gen_addi_i32(addr, addr, 1 << size); 533*f0984d40SFabiano Rosas 534*f0984d40SFabiano Rosas /* Subsequent memory operations inherit alignment */ 535*f0984d40SFabiano Rosas mop &= ~MO_AMASK; 536*f0984d40SFabiano Rosas } 537*f0984d40SFabiano Rosas } 538*f0984d40SFabiano Rosas } 539*f0984d40SFabiano Rosas tcg_temp_free_i32(addr); 540*f0984d40SFabiano Rosas tcg_temp_free_i64(tmp64); 541*f0984d40SFabiano Rosas 542*f0984d40SFabiano Rosas gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8); 543*f0984d40SFabiano Rosas return true; 544*f0984d40SFabiano Rosas } 545*f0984d40SFabiano Rosas 546*f0984d40SFabiano Rosas static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) 547*f0984d40SFabiano Rosas { 548*f0984d40SFabiano Rosas /* Neon load single structure to all lanes */ 549*f0984d40SFabiano Rosas int reg, stride, vec_size; 550*f0984d40SFabiano Rosas int vd = a->vd; 551*f0984d40SFabiano Rosas int size = a->size; 552*f0984d40SFabiano Rosas int nregs = a->n + 1; 553*f0984d40SFabiano Rosas TCGv_i32 addr, tmp; 554*f0984d40SFabiano Rosas MemOp mop, align; 555*f0984d40SFabiano Rosas 556*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 557*f0984d40SFabiano Rosas return false; 558*f0984d40SFabiano Rosas } 559*f0984d40SFabiano Rosas 560*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist */ 561*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 562*f0984d40SFabiano Rosas return false; 563*f0984d40SFabiano Rosas } 564*f0984d40SFabiano Rosas 565*f0984d40SFabiano Rosas align = 0; 566*f0984d40SFabiano Rosas if (size == 3) { 567*f0984d40SFabiano Rosas if (nregs != 4 || a->a == 0) { 568*f0984d40SFabiano Rosas return false; 569*f0984d40SFabiano Rosas } 570*f0984d40SFabiano Rosas /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ 571*f0984d40SFabiano Rosas size = MO_32; 572*f0984d40SFabiano Rosas align = MO_ALIGN_16; 573*f0984d40SFabiano Rosas } else if (a->a) { 574*f0984d40SFabiano Rosas switch (nregs) { 575*f0984d40SFabiano Rosas case 1: 576*f0984d40SFabiano Rosas if (size == 0) { 577*f0984d40SFabiano Rosas return false; 578*f0984d40SFabiano Rosas } 579*f0984d40SFabiano Rosas align = MO_ALIGN; 580*f0984d40SFabiano Rosas break; 581*f0984d40SFabiano Rosas case 2: 582*f0984d40SFabiano Rosas align = pow2_align(size + 1); 583*f0984d40SFabiano Rosas break; 584*f0984d40SFabiano Rosas case 3: 585*f0984d40SFabiano Rosas return false; 586*f0984d40SFabiano Rosas case 4: 587*f0984d40SFabiano Rosas if (size == 2) { 588*f0984d40SFabiano Rosas align = pow2_align(3); 589*f0984d40SFabiano Rosas } else { 590*f0984d40SFabiano Rosas align = pow2_align(size + 2); 591*f0984d40SFabiano Rosas } 592*f0984d40SFabiano Rosas break; 593*f0984d40SFabiano Rosas default: 594*f0984d40SFabiano Rosas g_assert_not_reached(); 595*f0984d40SFabiano Rosas } 596*f0984d40SFabiano Rosas } 597*f0984d40SFabiano Rosas 598*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 599*f0984d40SFabiano Rosas return true; 600*f0984d40SFabiano Rosas } 601*f0984d40SFabiano Rosas 602*f0984d40SFabiano Rosas /* 603*f0984d40SFabiano Rosas * VLD1 to all lanes: T bit indicates how many Dregs to write. 604*f0984d40SFabiano Rosas * VLD2/3/4 to all lanes: T bit indicates register stride. 605*f0984d40SFabiano Rosas */ 606*f0984d40SFabiano Rosas stride = a->t ? 2 : 1; 607*f0984d40SFabiano Rosas vec_size = nregs == 1 ? stride * 8 : 8; 608*f0984d40SFabiano Rosas mop = size | align; 609*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 610*f0984d40SFabiano Rosas addr = tcg_temp_new_i32(); 611*f0984d40SFabiano Rosas load_reg_var(s, addr, a->rn); 612*f0984d40SFabiano Rosas for (reg = 0; reg < nregs; reg++) { 613*f0984d40SFabiano Rosas gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop); 614*f0984d40SFabiano Rosas if ((vd & 1) && vec_size == 16) { 615*f0984d40SFabiano Rosas /* 616*f0984d40SFabiano Rosas * We cannot write 16 bytes at once because the 617*f0984d40SFabiano Rosas * destination is unaligned. 618*f0984d40SFabiano Rosas */ 619*f0984d40SFabiano Rosas tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 620*f0984d40SFabiano Rosas 8, 8, tmp); 621*f0984d40SFabiano Rosas tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1), 622*f0984d40SFabiano Rosas neon_full_reg_offset(vd), 8, 8); 623*f0984d40SFabiano Rosas } else { 624*f0984d40SFabiano Rosas tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 625*f0984d40SFabiano Rosas vec_size, vec_size, tmp); 626*f0984d40SFabiano Rosas } 627*f0984d40SFabiano Rosas tcg_gen_addi_i32(addr, addr, 1 << size); 628*f0984d40SFabiano Rosas vd += stride; 629*f0984d40SFabiano Rosas 630*f0984d40SFabiano Rosas /* Subsequent memory operations inherit alignment */ 631*f0984d40SFabiano Rosas mop &= ~MO_AMASK; 632*f0984d40SFabiano Rosas } 633*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 634*f0984d40SFabiano Rosas tcg_temp_free_i32(addr); 635*f0984d40SFabiano Rosas 636*f0984d40SFabiano Rosas gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs); 637*f0984d40SFabiano Rosas 638*f0984d40SFabiano Rosas return true; 639*f0984d40SFabiano Rosas } 640*f0984d40SFabiano Rosas 641*f0984d40SFabiano Rosas static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) 642*f0984d40SFabiano Rosas { 643*f0984d40SFabiano Rosas /* Neon load/store single structure to one lane */ 644*f0984d40SFabiano Rosas int reg; 645*f0984d40SFabiano Rosas int nregs = a->n + 1; 646*f0984d40SFabiano Rosas int vd = a->vd; 647*f0984d40SFabiano Rosas TCGv_i32 addr, tmp; 648*f0984d40SFabiano Rosas MemOp mop; 649*f0984d40SFabiano Rosas 650*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 651*f0984d40SFabiano Rosas return false; 652*f0984d40SFabiano Rosas } 653*f0984d40SFabiano Rosas 654*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist */ 655*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 656*f0984d40SFabiano Rosas return false; 657*f0984d40SFabiano Rosas } 658*f0984d40SFabiano Rosas 659*f0984d40SFabiano Rosas /* Catch the UNDEF cases. This is unavoidably a bit messy. */ 660*f0984d40SFabiano Rosas switch (nregs) { 661*f0984d40SFabiano Rosas case 1: 662*f0984d40SFabiano Rosas if (a->stride != 1) { 663*f0984d40SFabiano Rosas return false; 664*f0984d40SFabiano Rosas } 665*f0984d40SFabiano Rosas if (((a->align & (1 << a->size)) != 0) || 666*f0984d40SFabiano Rosas (a->size == 2 && (a->align == 1 || a->align == 2))) { 667*f0984d40SFabiano Rosas return false; 668*f0984d40SFabiano Rosas } 669*f0984d40SFabiano Rosas break; 670*f0984d40SFabiano Rosas case 2: 671*f0984d40SFabiano Rosas if (a->size == 2 && (a->align & 2) != 0) { 672*f0984d40SFabiano Rosas return false; 673*f0984d40SFabiano Rosas } 674*f0984d40SFabiano Rosas break; 675*f0984d40SFabiano Rosas case 3: 676*f0984d40SFabiano Rosas if (a->align != 0) { 677*f0984d40SFabiano Rosas return false; 678*f0984d40SFabiano Rosas } 679*f0984d40SFabiano Rosas break; 680*f0984d40SFabiano Rosas case 4: 681*f0984d40SFabiano Rosas if (a->size == 2 && a->align == 3) { 682*f0984d40SFabiano Rosas return false; 683*f0984d40SFabiano Rosas } 684*f0984d40SFabiano Rosas break; 685*f0984d40SFabiano Rosas default: 686*f0984d40SFabiano Rosas g_assert_not_reached(); 687*f0984d40SFabiano Rosas } 688*f0984d40SFabiano Rosas if ((vd + a->stride * (nregs - 1)) > 31) { 689*f0984d40SFabiano Rosas /* 690*f0984d40SFabiano Rosas * Attempts to write off the end of the register file are 691*f0984d40SFabiano Rosas * UNPREDICTABLE; we choose to UNDEF because otherwise we would 692*f0984d40SFabiano Rosas * access off the end of the array that holds the register data. 693*f0984d40SFabiano Rosas */ 694*f0984d40SFabiano Rosas return false; 695*f0984d40SFabiano Rosas } 696*f0984d40SFabiano Rosas 697*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 698*f0984d40SFabiano Rosas return true; 699*f0984d40SFabiano Rosas } 700*f0984d40SFabiano Rosas 701*f0984d40SFabiano Rosas /* Pick up SCTLR settings */ 702*f0984d40SFabiano Rosas mop = finalize_memop(s, a->size); 703*f0984d40SFabiano Rosas 704*f0984d40SFabiano Rosas if (a->align) { 705*f0984d40SFabiano Rosas MemOp align_op; 706*f0984d40SFabiano Rosas 707*f0984d40SFabiano Rosas switch (nregs) { 708*f0984d40SFabiano Rosas case 1: 709*f0984d40SFabiano Rosas /* For VLD1, use natural alignment. */ 710*f0984d40SFabiano Rosas align_op = MO_ALIGN; 711*f0984d40SFabiano Rosas break; 712*f0984d40SFabiano Rosas case 2: 713*f0984d40SFabiano Rosas /* For VLD2, use double alignment. */ 714*f0984d40SFabiano Rosas align_op = pow2_align(a->size + 1); 715*f0984d40SFabiano Rosas break; 716*f0984d40SFabiano Rosas case 4: 717*f0984d40SFabiano Rosas if (a->size == MO_32) { 718*f0984d40SFabiano Rosas /* 719*f0984d40SFabiano Rosas * For VLD4.32, align = 1 is double alignment, align = 2 is 720*f0984d40SFabiano Rosas * quad alignment; align = 3 is rejected above. 721*f0984d40SFabiano Rosas */ 722*f0984d40SFabiano Rosas align_op = pow2_align(a->size + a->align); 723*f0984d40SFabiano Rosas } else { 724*f0984d40SFabiano Rosas /* For VLD4.8 and VLD.16, we want quad alignment. */ 725*f0984d40SFabiano Rosas align_op = pow2_align(a->size + 2); 726*f0984d40SFabiano Rosas } 727*f0984d40SFabiano Rosas break; 728*f0984d40SFabiano Rosas default: 729*f0984d40SFabiano Rosas /* For VLD3, the alignment field is zero and rejected above. */ 730*f0984d40SFabiano Rosas g_assert_not_reached(); 731*f0984d40SFabiano Rosas } 732*f0984d40SFabiano Rosas 733*f0984d40SFabiano Rosas mop = (mop & ~MO_AMASK) | align_op; 734*f0984d40SFabiano Rosas } 735*f0984d40SFabiano Rosas 736*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 737*f0984d40SFabiano Rosas addr = tcg_temp_new_i32(); 738*f0984d40SFabiano Rosas load_reg_var(s, addr, a->rn); 739*f0984d40SFabiano Rosas 740*f0984d40SFabiano Rosas for (reg = 0; reg < nregs; reg++) { 741*f0984d40SFabiano Rosas if (a->l) { 742*f0984d40SFabiano Rosas gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop); 743*f0984d40SFabiano Rosas neon_store_element(vd, a->reg_idx, a->size, tmp); 744*f0984d40SFabiano Rosas } else { /* Store */ 745*f0984d40SFabiano Rosas neon_load_element(tmp, vd, a->reg_idx, a->size); 746*f0984d40SFabiano Rosas gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop); 747*f0984d40SFabiano Rosas } 748*f0984d40SFabiano Rosas vd += a->stride; 749*f0984d40SFabiano Rosas tcg_gen_addi_i32(addr, addr, 1 << a->size); 750*f0984d40SFabiano Rosas 751*f0984d40SFabiano Rosas /* Subsequent memory operations inherit alignment */ 752*f0984d40SFabiano Rosas mop &= ~MO_AMASK; 753*f0984d40SFabiano Rosas } 754*f0984d40SFabiano Rosas tcg_temp_free_i32(addr); 755*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 756*f0984d40SFabiano Rosas 757*f0984d40SFabiano Rosas gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs); 758*f0984d40SFabiano Rosas 759*f0984d40SFabiano Rosas return true; 760*f0984d40SFabiano Rosas } 761*f0984d40SFabiano Rosas 762*f0984d40SFabiano Rosas static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn) 763*f0984d40SFabiano Rosas { 764*f0984d40SFabiano Rosas int vec_size = a->q ? 16 : 8; 765*f0984d40SFabiano Rosas int rd_ofs = neon_full_reg_offset(a->vd); 766*f0984d40SFabiano Rosas int rn_ofs = neon_full_reg_offset(a->vn); 767*f0984d40SFabiano Rosas int rm_ofs = neon_full_reg_offset(a->vm); 768*f0984d40SFabiano Rosas 769*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 770*f0984d40SFabiano Rosas return false; 771*f0984d40SFabiano Rosas } 772*f0984d40SFabiano Rosas 773*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 774*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 775*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 776*f0984d40SFabiano Rosas return false; 777*f0984d40SFabiano Rosas } 778*f0984d40SFabiano Rosas 779*f0984d40SFabiano Rosas if ((a->vn | a->vm | a->vd) & a->q) { 780*f0984d40SFabiano Rosas return false; 781*f0984d40SFabiano Rosas } 782*f0984d40SFabiano Rosas 783*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 784*f0984d40SFabiano Rosas return true; 785*f0984d40SFabiano Rosas } 786*f0984d40SFabiano Rosas 787*f0984d40SFabiano Rosas fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); 788*f0984d40SFabiano Rosas return true; 789*f0984d40SFabiano Rosas } 790*f0984d40SFabiano Rosas 791*f0984d40SFabiano Rosas #define DO_3SAME(INSN, FUNC) \ 792*f0984d40SFabiano Rosas static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 793*f0984d40SFabiano Rosas { \ 794*f0984d40SFabiano Rosas return do_3same(s, a, FUNC); \ 795*f0984d40SFabiano Rosas } 796*f0984d40SFabiano Rosas 797*f0984d40SFabiano Rosas DO_3SAME(VADD, tcg_gen_gvec_add) 798*f0984d40SFabiano Rosas DO_3SAME(VSUB, tcg_gen_gvec_sub) 799*f0984d40SFabiano Rosas DO_3SAME(VAND, tcg_gen_gvec_and) 800*f0984d40SFabiano Rosas DO_3SAME(VBIC, tcg_gen_gvec_andc) 801*f0984d40SFabiano Rosas DO_3SAME(VORR, tcg_gen_gvec_or) 802*f0984d40SFabiano Rosas DO_3SAME(VORN, tcg_gen_gvec_orc) 803*f0984d40SFabiano Rosas DO_3SAME(VEOR, tcg_gen_gvec_xor) 804*f0984d40SFabiano Rosas DO_3SAME(VSHL_S, gen_gvec_sshl) 805*f0984d40SFabiano Rosas DO_3SAME(VSHL_U, gen_gvec_ushl) 806*f0984d40SFabiano Rosas DO_3SAME(VQADD_S, gen_gvec_sqadd_qc) 807*f0984d40SFabiano Rosas DO_3SAME(VQADD_U, gen_gvec_uqadd_qc) 808*f0984d40SFabiano Rosas DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc) 809*f0984d40SFabiano Rosas DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc) 810*f0984d40SFabiano Rosas 811*f0984d40SFabiano Rosas /* These insns are all gvec_bitsel but with the inputs in various orders. */ 812*f0984d40SFabiano Rosas #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ 813*f0984d40SFabiano Rosas static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 814*f0984d40SFabiano Rosas uint32_t rn_ofs, uint32_t rm_ofs, \ 815*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) \ 816*f0984d40SFabiano Rosas { \ 817*f0984d40SFabiano Rosas tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \ 818*f0984d40SFabiano Rosas } \ 819*f0984d40SFabiano Rosas DO_3SAME(INSN, gen_##INSN##_3s) 820*f0984d40SFabiano Rosas 821*f0984d40SFabiano Rosas DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs) 822*f0984d40SFabiano Rosas DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs) 823*f0984d40SFabiano Rosas DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs) 824*f0984d40SFabiano Rosas 825*f0984d40SFabiano Rosas #define DO_3SAME_NO_SZ_3(INSN, FUNC) \ 826*f0984d40SFabiano Rosas static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 827*f0984d40SFabiano Rosas { \ 828*f0984d40SFabiano Rosas if (a->size == 3) { \ 829*f0984d40SFabiano Rosas return false; \ 830*f0984d40SFabiano Rosas } \ 831*f0984d40SFabiano Rosas return do_3same(s, a, FUNC); \ 832*f0984d40SFabiano Rosas } 833*f0984d40SFabiano Rosas 834*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax) 835*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax) 836*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin) 837*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin) 838*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul) 839*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla) 840*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls) 841*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst) 842*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd) 843*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba) 844*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd) 845*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba) 846*f0984d40SFabiano Rosas 847*f0984d40SFabiano Rosas #define DO_3SAME_CMP(INSN, COND) \ 848*f0984d40SFabiano Rosas static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 849*f0984d40SFabiano Rosas uint32_t rn_ofs, uint32_t rm_ofs, \ 850*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) \ 851*f0984d40SFabiano Rosas { \ 852*f0984d40SFabiano Rosas tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \ 853*f0984d40SFabiano Rosas } \ 854*f0984d40SFabiano Rosas DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s) 855*f0984d40SFabiano Rosas 856*f0984d40SFabiano Rosas DO_3SAME_CMP(VCGT_S, TCG_COND_GT) 857*f0984d40SFabiano Rosas DO_3SAME_CMP(VCGT_U, TCG_COND_GTU) 858*f0984d40SFabiano Rosas DO_3SAME_CMP(VCGE_S, TCG_COND_GE) 859*f0984d40SFabiano Rosas DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) 860*f0984d40SFabiano Rosas DO_3SAME_CMP(VCEQ, TCG_COND_EQ) 861*f0984d40SFabiano Rosas 862*f0984d40SFabiano Rosas #define WRAP_OOL_FN(WRAPNAME, FUNC) \ 863*f0984d40SFabiano Rosas static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \ 864*f0984d40SFabiano Rosas uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \ 865*f0984d40SFabiano Rosas { \ 866*f0984d40SFabiano Rosas tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \ 867*f0984d40SFabiano Rosas } 868*f0984d40SFabiano Rosas 869*f0984d40SFabiano Rosas WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b) 870*f0984d40SFabiano Rosas 871*f0984d40SFabiano Rosas static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) 872*f0984d40SFabiano Rosas { 873*f0984d40SFabiano Rosas if (a->size != 0) { 874*f0984d40SFabiano Rosas return false; 875*f0984d40SFabiano Rosas } 876*f0984d40SFabiano Rosas return do_3same(s, a, gen_VMUL_p_3s); 877*f0984d40SFabiano Rosas } 878*f0984d40SFabiano Rosas 879*f0984d40SFabiano Rosas #define DO_VQRDMLAH(INSN, FUNC) \ 880*f0984d40SFabiano Rosas static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 881*f0984d40SFabiano Rosas { \ 882*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_rdm, s)) { \ 883*f0984d40SFabiano Rosas return false; \ 884*f0984d40SFabiano Rosas } \ 885*f0984d40SFabiano Rosas if (a->size != 1 && a->size != 2) { \ 886*f0984d40SFabiano Rosas return false; \ 887*f0984d40SFabiano Rosas } \ 888*f0984d40SFabiano Rosas return do_3same(s, a, FUNC); \ 889*f0984d40SFabiano Rosas } 890*f0984d40SFabiano Rosas 891*f0984d40SFabiano Rosas DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc) 892*f0984d40SFabiano Rosas DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc) 893*f0984d40SFabiano Rosas 894*f0984d40SFabiano Rosas #define DO_SHA1(NAME, FUNC) \ 895*f0984d40SFabiano Rosas WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ 896*f0984d40SFabiano Rosas static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ 897*f0984d40SFabiano Rosas { \ 898*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_sha1, s)) { \ 899*f0984d40SFabiano Rosas return false; \ 900*f0984d40SFabiano Rosas } \ 901*f0984d40SFabiano Rosas return do_3same(s, a, gen_##NAME##_3s); \ 902*f0984d40SFabiano Rosas } 903*f0984d40SFabiano Rosas 904*f0984d40SFabiano Rosas DO_SHA1(SHA1C, gen_helper_crypto_sha1c) 905*f0984d40SFabiano Rosas DO_SHA1(SHA1P, gen_helper_crypto_sha1p) 906*f0984d40SFabiano Rosas DO_SHA1(SHA1M, gen_helper_crypto_sha1m) 907*f0984d40SFabiano Rosas DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0) 908*f0984d40SFabiano Rosas 909*f0984d40SFabiano Rosas #define DO_SHA2(NAME, FUNC) \ 910*f0984d40SFabiano Rosas WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ 911*f0984d40SFabiano Rosas static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ 912*f0984d40SFabiano Rosas { \ 913*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_sha2, s)) { \ 914*f0984d40SFabiano Rosas return false; \ 915*f0984d40SFabiano Rosas } \ 916*f0984d40SFabiano Rosas return do_3same(s, a, gen_##NAME##_3s); \ 917*f0984d40SFabiano Rosas } 918*f0984d40SFabiano Rosas 919*f0984d40SFabiano Rosas DO_SHA2(SHA256H, gen_helper_crypto_sha256h) 920*f0984d40SFabiano Rosas DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) 921*f0984d40SFabiano Rosas DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) 922*f0984d40SFabiano Rosas 923*f0984d40SFabiano Rosas #define DO_3SAME_64(INSN, FUNC) \ 924*f0984d40SFabiano Rosas static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 925*f0984d40SFabiano Rosas uint32_t rn_ofs, uint32_t rm_ofs, \ 926*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) \ 927*f0984d40SFabiano Rosas { \ 928*f0984d40SFabiano Rosas static const GVecGen3 op = { .fni8 = FUNC }; \ 929*f0984d40SFabiano Rosas tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \ 930*f0984d40SFabiano Rosas } \ 931*f0984d40SFabiano Rosas DO_3SAME(INSN, gen_##INSN##_3s) 932*f0984d40SFabiano Rosas 933*f0984d40SFabiano Rosas #define DO_3SAME_64_ENV(INSN, FUNC) \ 934*f0984d40SFabiano Rosas static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ 935*f0984d40SFabiano Rosas { \ 936*f0984d40SFabiano Rosas FUNC(d, cpu_env, n, m); \ 937*f0984d40SFabiano Rosas } \ 938*f0984d40SFabiano Rosas DO_3SAME_64(INSN, gen_##INSN##_elt) 939*f0984d40SFabiano Rosas 940*f0984d40SFabiano Rosas DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) 941*f0984d40SFabiano Rosas DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) 942*f0984d40SFabiano Rosas DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) 943*f0984d40SFabiano Rosas DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) 944*f0984d40SFabiano Rosas DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) 945*f0984d40SFabiano Rosas DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) 946*f0984d40SFabiano Rosas 947*f0984d40SFabiano Rosas #define DO_3SAME_32(INSN, FUNC) \ 948*f0984d40SFabiano Rosas static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 949*f0984d40SFabiano Rosas uint32_t rn_ofs, uint32_t rm_ofs, \ 950*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) \ 951*f0984d40SFabiano Rosas { \ 952*f0984d40SFabiano Rosas static const GVecGen3 ops[4] = { \ 953*f0984d40SFabiano Rosas { .fni4 = gen_helper_neon_##FUNC##8 }, \ 954*f0984d40SFabiano Rosas { .fni4 = gen_helper_neon_##FUNC##16 }, \ 955*f0984d40SFabiano Rosas { .fni4 = gen_helper_neon_##FUNC##32 }, \ 956*f0984d40SFabiano Rosas { 0 }, \ 957*f0984d40SFabiano Rosas }; \ 958*f0984d40SFabiano Rosas tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ 959*f0984d40SFabiano Rosas } \ 960*f0984d40SFabiano Rosas static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 961*f0984d40SFabiano Rosas { \ 962*f0984d40SFabiano Rosas if (a->size > 2) { \ 963*f0984d40SFabiano Rosas return false; \ 964*f0984d40SFabiano Rosas } \ 965*f0984d40SFabiano Rosas return do_3same(s, a, gen_##INSN##_3s); \ 966*f0984d40SFabiano Rosas } 967*f0984d40SFabiano Rosas 968*f0984d40SFabiano Rosas /* 969*f0984d40SFabiano Rosas * Some helper functions need to be passed the cpu_env. In order 970*f0984d40SFabiano Rosas * to use those with the gvec APIs like tcg_gen_gvec_3() we need 971*f0984d40SFabiano Rosas * to create wrapper functions whose prototype is a NeonGenTwoOpFn() 972*f0984d40SFabiano Rosas * and which call a NeonGenTwoOpEnvFn(). 973*f0984d40SFabiano Rosas */ 974*f0984d40SFabiano Rosas #define WRAP_ENV_FN(WRAPNAME, FUNC) \ 975*f0984d40SFabiano Rosas static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \ 976*f0984d40SFabiano Rosas { \ 977*f0984d40SFabiano Rosas FUNC(d, cpu_env, n, m); \ 978*f0984d40SFabiano Rosas } 979*f0984d40SFabiano Rosas 980*f0984d40SFabiano Rosas #define DO_3SAME_32_ENV(INSN, FUNC) \ 981*f0984d40SFabiano Rosas WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ 982*f0984d40SFabiano Rosas WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ 983*f0984d40SFabiano Rosas WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ 984*f0984d40SFabiano Rosas static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 985*f0984d40SFabiano Rosas uint32_t rn_ofs, uint32_t rm_ofs, \ 986*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) \ 987*f0984d40SFabiano Rosas { \ 988*f0984d40SFabiano Rosas static const GVecGen3 ops[4] = { \ 989*f0984d40SFabiano Rosas { .fni4 = gen_##INSN##_tramp8 }, \ 990*f0984d40SFabiano Rosas { .fni4 = gen_##INSN##_tramp16 }, \ 991*f0984d40SFabiano Rosas { .fni4 = gen_##INSN##_tramp32 }, \ 992*f0984d40SFabiano Rosas { 0 }, \ 993*f0984d40SFabiano Rosas }; \ 994*f0984d40SFabiano Rosas tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ 995*f0984d40SFabiano Rosas } \ 996*f0984d40SFabiano Rosas static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 997*f0984d40SFabiano Rosas { \ 998*f0984d40SFabiano Rosas if (a->size > 2) { \ 999*f0984d40SFabiano Rosas return false; \ 1000*f0984d40SFabiano Rosas } \ 1001*f0984d40SFabiano Rosas return do_3same(s, a, gen_##INSN##_3s); \ 1002*f0984d40SFabiano Rosas } 1003*f0984d40SFabiano Rosas 1004*f0984d40SFabiano Rosas DO_3SAME_32(VHADD_S, hadd_s) 1005*f0984d40SFabiano Rosas DO_3SAME_32(VHADD_U, hadd_u) 1006*f0984d40SFabiano Rosas DO_3SAME_32(VHSUB_S, hsub_s) 1007*f0984d40SFabiano Rosas DO_3SAME_32(VHSUB_U, hsub_u) 1008*f0984d40SFabiano Rosas DO_3SAME_32(VRHADD_S, rhadd_s) 1009*f0984d40SFabiano Rosas DO_3SAME_32(VRHADD_U, rhadd_u) 1010*f0984d40SFabiano Rosas DO_3SAME_32(VRSHL_S, rshl_s) 1011*f0984d40SFabiano Rosas DO_3SAME_32(VRSHL_U, rshl_u) 1012*f0984d40SFabiano Rosas 1013*f0984d40SFabiano Rosas DO_3SAME_32_ENV(VQSHL_S, qshl_s) 1014*f0984d40SFabiano Rosas DO_3SAME_32_ENV(VQSHL_U, qshl_u) 1015*f0984d40SFabiano Rosas DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) 1016*f0984d40SFabiano Rosas DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) 1017*f0984d40SFabiano Rosas 1018*f0984d40SFabiano Rosas static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) 1019*f0984d40SFabiano Rosas { 1020*f0984d40SFabiano Rosas /* Operations handled pairwise 32 bits at a time */ 1021*f0984d40SFabiano Rosas TCGv_i32 tmp, tmp2, tmp3; 1022*f0984d40SFabiano Rosas 1023*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1024*f0984d40SFabiano Rosas return false; 1025*f0984d40SFabiano Rosas } 1026*f0984d40SFabiano Rosas 1027*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1028*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1029*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 1030*f0984d40SFabiano Rosas return false; 1031*f0984d40SFabiano Rosas } 1032*f0984d40SFabiano Rosas 1033*f0984d40SFabiano Rosas if (a->size == 3) { 1034*f0984d40SFabiano Rosas return false; 1035*f0984d40SFabiano Rosas } 1036*f0984d40SFabiano Rosas 1037*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1038*f0984d40SFabiano Rosas return true; 1039*f0984d40SFabiano Rosas } 1040*f0984d40SFabiano Rosas 1041*f0984d40SFabiano Rosas assert(a->q == 0); /* enforced by decode patterns */ 1042*f0984d40SFabiano Rosas 1043*f0984d40SFabiano Rosas /* 1044*f0984d40SFabiano Rosas * Note that we have to be careful not to clobber the source operands 1045*f0984d40SFabiano Rosas * in the "vm == vd" case by storing the result of the first pass too 1046*f0984d40SFabiano Rosas * early. Since Q is 0 there are always just two passes, so instead 1047*f0984d40SFabiano Rosas * of a complicated loop over each pass we just unroll. 1048*f0984d40SFabiano Rosas */ 1049*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 1050*f0984d40SFabiano Rosas tmp2 = tcg_temp_new_i32(); 1051*f0984d40SFabiano Rosas tmp3 = tcg_temp_new_i32(); 1052*f0984d40SFabiano Rosas 1053*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vn, 0, MO_32); 1054*f0984d40SFabiano Rosas read_neon_element32(tmp2, a->vn, 1, MO_32); 1055*f0984d40SFabiano Rosas fn(tmp, tmp, tmp2); 1056*f0984d40SFabiano Rosas 1057*f0984d40SFabiano Rosas read_neon_element32(tmp3, a->vm, 0, MO_32); 1058*f0984d40SFabiano Rosas read_neon_element32(tmp2, a->vm, 1, MO_32); 1059*f0984d40SFabiano Rosas fn(tmp3, tmp3, tmp2); 1060*f0984d40SFabiano Rosas 1061*f0984d40SFabiano Rosas write_neon_element32(tmp, a->vd, 0, MO_32); 1062*f0984d40SFabiano Rosas write_neon_element32(tmp3, a->vd, 1, MO_32); 1063*f0984d40SFabiano Rosas 1064*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 1065*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp2); 1066*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp3); 1067*f0984d40SFabiano Rosas return true; 1068*f0984d40SFabiano Rosas } 1069*f0984d40SFabiano Rosas 1070*f0984d40SFabiano Rosas #define DO_3SAME_PAIR(INSN, func) \ 1071*f0984d40SFabiano Rosas static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 1072*f0984d40SFabiano Rosas { \ 1073*f0984d40SFabiano Rosas static NeonGenTwoOpFn * const fns[] = { \ 1074*f0984d40SFabiano Rosas gen_helper_neon_##func##8, \ 1075*f0984d40SFabiano Rosas gen_helper_neon_##func##16, \ 1076*f0984d40SFabiano Rosas gen_helper_neon_##func##32, \ 1077*f0984d40SFabiano Rosas }; \ 1078*f0984d40SFabiano Rosas if (a->size > 2) { \ 1079*f0984d40SFabiano Rosas return false; \ 1080*f0984d40SFabiano Rosas } \ 1081*f0984d40SFabiano Rosas return do_3same_pair(s, a, fns[a->size]); \ 1082*f0984d40SFabiano Rosas } 1083*f0984d40SFabiano Rosas 1084*f0984d40SFabiano Rosas /* 32-bit pairwise ops end up the same as the elementwise versions. */ 1085*f0984d40SFabiano Rosas #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 1086*f0984d40SFabiano Rosas #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 1087*f0984d40SFabiano Rosas #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 1088*f0984d40SFabiano Rosas #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 1089*f0984d40SFabiano Rosas #define gen_helper_neon_padd_u32 tcg_gen_add_i32 1090*f0984d40SFabiano Rosas 1091*f0984d40SFabiano Rosas DO_3SAME_PAIR(VPMAX_S, pmax_s) 1092*f0984d40SFabiano Rosas DO_3SAME_PAIR(VPMIN_S, pmin_s) 1093*f0984d40SFabiano Rosas DO_3SAME_PAIR(VPMAX_U, pmax_u) 1094*f0984d40SFabiano Rosas DO_3SAME_PAIR(VPMIN_U, pmin_u) 1095*f0984d40SFabiano Rosas DO_3SAME_PAIR(VPADD, padd_u) 1096*f0984d40SFabiano Rosas 1097*f0984d40SFabiano Rosas #define DO_3SAME_VQDMULH(INSN, FUNC) \ 1098*f0984d40SFabiano Rosas WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \ 1099*f0984d40SFabiano Rosas WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \ 1100*f0984d40SFabiano Rosas static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 1101*f0984d40SFabiano Rosas uint32_t rn_ofs, uint32_t rm_ofs, \ 1102*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) \ 1103*f0984d40SFabiano Rosas { \ 1104*f0984d40SFabiano Rosas static const GVecGen3 ops[2] = { \ 1105*f0984d40SFabiano Rosas { .fni4 = gen_##INSN##_tramp16 }, \ 1106*f0984d40SFabiano Rosas { .fni4 = gen_##INSN##_tramp32 }, \ 1107*f0984d40SFabiano Rosas }; \ 1108*f0984d40SFabiano Rosas tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \ 1109*f0984d40SFabiano Rosas } \ 1110*f0984d40SFabiano Rosas static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 1111*f0984d40SFabiano Rosas { \ 1112*f0984d40SFabiano Rosas if (a->size != 1 && a->size != 2) { \ 1113*f0984d40SFabiano Rosas return false; \ 1114*f0984d40SFabiano Rosas } \ 1115*f0984d40SFabiano Rosas return do_3same(s, a, gen_##INSN##_3s); \ 1116*f0984d40SFabiano Rosas } 1117*f0984d40SFabiano Rosas 1118*f0984d40SFabiano Rosas DO_3SAME_VQDMULH(VQDMULH, qdmulh) 1119*f0984d40SFabiano Rosas DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) 1120*f0984d40SFabiano Rosas 1121*f0984d40SFabiano Rosas #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \ 1122*f0984d40SFabiano Rosas static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 1123*f0984d40SFabiano Rosas uint32_t rn_ofs, uint32_t rm_ofs, \ 1124*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) \ 1125*f0984d40SFabiano Rosas { \ 1126*f0984d40SFabiano Rosas TCGv_ptr fpst = fpstatus_ptr(FPST); \ 1127*f0984d40SFabiano Rosas tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \ 1128*f0984d40SFabiano Rosas oprsz, maxsz, 0, FUNC); \ 1129*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpst); \ 1130*f0984d40SFabiano Rosas } 1131*f0984d40SFabiano Rosas 1132*f0984d40SFabiano Rosas #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \ 1133*f0984d40SFabiano Rosas WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \ 1134*f0984d40SFabiano Rosas WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \ 1135*f0984d40SFabiano Rosas static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ 1136*f0984d40SFabiano Rosas { \ 1137*f0984d40SFabiano Rosas if (a->size == MO_16) { \ 1138*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 1139*f0984d40SFabiano Rosas return false; \ 1140*f0984d40SFabiano Rosas } \ 1141*f0984d40SFabiano Rosas return do_3same(s, a, gen_##INSN##_fp16_3s); \ 1142*f0984d40SFabiano Rosas } \ 1143*f0984d40SFabiano Rosas return do_3same(s, a, gen_##INSN##_fp32_3s); \ 1144*f0984d40SFabiano Rosas } 1145*f0984d40SFabiano Rosas 1146*f0984d40SFabiano Rosas 1147*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h) 1148*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h) 1149*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h) 1150*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h) 1151*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h) 1152*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h) 1153*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h) 1154*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h) 1155*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h) 1156*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h) 1157*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h) 1158*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h) 1159*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h) 1160*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) 1161*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) 1162*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) 1163*f0984d40SFabiano Rosas DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h) 1164*f0984d40SFabiano Rosas 1165*f0984d40SFabiano Rosas WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s) 1166*f0984d40SFabiano Rosas WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h) 1167*f0984d40SFabiano Rosas WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s) 1168*f0984d40SFabiano Rosas WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h) 1169*f0984d40SFabiano Rosas 1170*f0984d40SFabiano Rosas static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a) 1171*f0984d40SFabiano Rosas { 1172*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 1173*f0984d40SFabiano Rosas return false; 1174*f0984d40SFabiano Rosas } 1175*f0984d40SFabiano Rosas 1176*f0984d40SFabiano Rosas if (a->size == MO_16) { 1177*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { 1178*f0984d40SFabiano Rosas return false; 1179*f0984d40SFabiano Rosas } 1180*f0984d40SFabiano Rosas return do_3same(s, a, gen_VMAXNM_fp16_3s); 1181*f0984d40SFabiano Rosas } 1182*f0984d40SFabiano Rosas return do_3same(s, a, gen_VMAXNM_fp32_3s); 1183*f0984d40SFabiano Rosas } 1184*f0984d40SFabiano Rosas 1185*f0984d40SFabiano Rosas static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) 1186*f0984d40SFabiano Rosas { 1187*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 1188*f0984d40SFabiano Rosas return false; 1189*f0984d40SFabiano Rosas } 1190*f0984d40SFabiano Rosas 1191*f0984d40SFabiano Rosas if (a->size == MO_16) { 1192*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { 1193*f0984d40SFabiano Rosas return false; 1194*f0984d40SFabiano Rosas } 1195*f0984d40SFabiano Rosas return do_3same(s, a, gen_VMINNM_fp16_3s); 1196*f0984d40SFabiano Rosas } 1197*f0984d40SFabiano Rosas return do_3same(s, a, gen_VMINNM_fp32_3s); 1198*f0984d40SFabiano Rosas } 1199*f0984d40SFabiano Rosas 1200*f0984d40SFabiano Rosas static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, 1201*f0984d40SFabiano Rosas gen_helper_gvec_3_ptr *fn) 1202*f0984d40SFabiano Rosas { 1203*f0984d40SFabiano Rosas /* FP pairwise operations */ 1204*f0984d40SFabiano Rosas TCGv_ptr fpstatus; 1205*f0984d40SFabiano Rosas 1206*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1207*f0984d40SFabiano Rosas return false; 1208*f0984d40SFabiano Rosas } 1209*f0984d40SFabiano Rosas 1210*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1211*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1212*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 1213*f0984d40SFabiano Rosas return false; 1214*f0984d40SFabiano Rosas } 1215*f0984d40SFabiano Rosas 1216*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1217*f0984d40SFabiano Rosas return true; 1218*f0984d40SFabiano Rosas } 1219*f0984d40SFabiano Rosas 1220*f0984d40SFabiano Rosas assert(a->q == 0); /* enforced by decode patterns */ 1221*f0984d40SFabiano Rosas 1222*f0984d40SFabiano Rosas 1223*f0984d40SFabiano Rosas fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 1224*f0984d40SFabiano Rosas tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 1225*f0984d40SFabiano Rosas vfp_reg_offset(1, a->vn), 1226*f0984d40SFabiano Rosas vfp_reg_offset(1, a->vm), 1227*f0984d40SFabiano Rosas fpstatus, 8, 8, 0, fn); 1228*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpstatus); 1229*f0984d40SFabiano Rosas 1230*f0984d40SFabiano Rosas return true; 1231*f0984d40SFabiano Rosas } 1232*f0984d40SFabiano Rosas 1233*f0984d40SFabiano Rosas /* 1234*f0984d40SFabiano Rosas * For all the functions using this macro, size == 1 means fp16, 1235*f0984d40SFabiano Rosas * which is an architecture extension we don't implement yet. 1236*f0984d40SFabiano Rosas */ 1237*f0984d40SFabiano Rosas #define DO_3S_FP_PAIR(INSN,FUNC) \ 1238*f0984d40SFabiano Rosas static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ 1239*f0984d40SFabiano Rosas { \ 1240*f0984d40SFabiano Rosas if (a->size == MO_16) { \ 1241*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 1242*f0984d40SFabiano Rosas return false; \ 1243*f0984d40SFabiano Rosas } \ 1244*f0984d40SFabiano Rosas return do_3same_fp_pair(s, a, FUNC##h); \ 1245*f0984d40SFabiano Rosas } \ 1246*f0984d40SFabiano Rosas return do_3same_fp_pair(s, a, FUNC##s); \ 1247*f0984d40SFabiano Rosas } 1248*f0984d40SFabiano Rosas 1249*f0984d40SFabiano Rosas DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd) 1250*f0984d40SFabiano Rosas DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax) 1251*f0984d40SFabiano Rosas DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin) 1252*f0984d40SFabiano Rosas 1253*f0984d40SFabiano Rosas static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) 1254*f0984d40SFabiano Rosas { 1255*f0984d40SFabiano Rosas /* Handle a 2-reg-shift insn which can be vectorized. */ 1256*f0984d40SFabiano Rosas int vec_size = a->q ? 16 : 8; 1257*f0984d40SFabiano Rosas int rd_ofs = neon_full_reg_offset(a->vd); 1258*f0984d40SFabiano Rosas int rm_ofs = neon_full_reg_offset(a->vm); 1259*f0984d40SFabiano Rosas 1260*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1261*f0984d40SFabiano Rosas return false; 1262*f0984d40SFabiano Rosas } 1263*f0984d40SFabiano Rosas 1264*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1265*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1266*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 1267*f0984d40SFabiano Rosas return false; 1268*f0984d40SFabiano Rosas } 1269*f0984d40SFabiano Rosas 1270*f0984d40SFabiano Rosas if ((a->vm | a->vd) & a->q) { 1271*f0984d40SFabiano Rosas return false; 1272*f0984d40SFabiano Rosas } 1273*f0984d40SFabiano Rosas 1274*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1275*f0984d40SFabiano Rosas return true; 1276*f0984d40SFabiano Rosas } 1277*f0984d40SFabiano Rosas 1278*f0984d40SFabiano Rosas fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size); 1279*f0984d40SFabiano Rosas return true; 1280*f0984d40SFabiano Rosas } 1281*f0984d40SFabiano Rosas 1282*f0984d40SFabiano Rosas #define DO_2SH(INSN, FUNC) \ 1283*f0984d40SFabiano Rosas static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1284*f0984d40SFabiano Rosas { \ 1285*f0984d40SFabiano Rosas return do_vector_2sh(s, a, FUNC); \ 1286*f0984d40SFabiano Rosas } \ 1287*f0984d40SFabiano Rosas 1288*f0984d40SFabiano Rosas DO_2SH(VSHL, tcg_gen_gvec_shli) 1289*f0984d40SFabiano Rosas DO_2SH(VSLI, gen_gvec_sli) 1290*f0984d40SFabiano Rosas DO_2SH(VSRI, gen_gvec_sri) 1291*f0984d40SFabiano Rosas DO_2SH(VSRA_S, gen_gvec_ssra) 1292*f0984d40SFabiano Rosas DO_2SH(VSRA_U, gen_gvec_usra) 1293*f0984d40SFabiano Rosas DO_2SH(VRSHR_S, gen_gvec_srshr) 1294*f0984d40SFabiano Rosas DO_2SH(VRSHR_U, gen_gvec_urshr) 1295*f0984d40SFabiano Rosas DO_2SH(VRSRA_S, gen_gvec_srsra) 1296*f0984d40SFabiano Rosas DO_2SH(VRSRA_U, gen_gvec_ursra) 1297*f0984d40SFabiano Rosas 1298*f0984d40SFabiano Rosas static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) 1299*f0984d40SFabiano Rosas { 1300*f0984d40SFabiano Rosas /* Signed shift out of range results in all-sign-bits */ 1301*f0984d40SFabiano Rosas a->shift = MIN(a->shift, (8 << a->size) - 1); 1302*f0984d40SFabiano Rosas return do_vector_2sh(s, a, tcg_gen_gvec_sari); 1303*f0984d40SFabiano Rosas } 1304*f0984d40SFabiano Rosas 1305*f0984d40SFabiano Rosas static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 1306*f0984d40SFabiano Rosas int64_t shift, uint32_t oprsz, uint32_t maxsz) 1307*f0984d40SFabiano Rosas { 1308*f0984d40SFabiano Rosas tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0); 1309*f0984d40SFabiano Rosas } 1310*f0984d40SFabiano Rosas 1311*f0984d40SFabiano Rosas static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) 1312*f0984d40SFabiano Rosas { 1313*f0984d40SFabiano Rosas /* Shift out of range is architecturally valid and results in zero. */ 1314*f0984d40SFabiano Rosas if (a->shift >= (8 << a->size)) { 1315*f0984d40SFabiano Rosas return do_vector_2sh(s, a, gen_zero_rd_2sh); 1316*f0984d40SFabiano Rosas } else { 1317*f0984d40SFabiano Rosas return do_vector_2sh(s, a, tcg_gen_gvec_shri); 1318*f0984d40SFabiano Rosas } 1319*f0984d40SFabiano Rosas } 1320*f0984d40SFabiano Rosas 1321*f0984d40SFabiano Rosas static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, 1322*f0984d40SFabiano Rosas NeonGenTwo64OpEnvFn *fn) 1323*f0984d40SFabiano Rosas { 1324*f0984d40SFabiano Rosas /* 1325*f0984d40SFabiano Rosas * 2-reg-and-shift operations, size == 3 case, where the 1326*f0984d40SFabiano Rosas * function needs to be passed cpu_env. 1327*f0984d40SFabiano Rosas */ 1328*f0984d40SFabiano Rosas TCGv_i64 constimm; 1329*f0984d40SFabiano Rosas int pass; 1330*f0984d40SFabiano Rosas 1331*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1332*f0984d40SFabiano Rosas return false; 1333*f0984d40SFabiano Rosas } 1334*f0984d40SFabiano Rosas 1335*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1336*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1337*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 1338*f0984d40SFabiano Rosas return false; 1339*f0984d40SFabiano Rosas } 1340*f0984d40SFabiano Rosas 1341*f0984d40SFabiano Rosas if ((a->vm | a->vd) & a->q) { 1342*f0984d40SFabiano Rosas return false; 1343*f0984d40SFabiano Rosas } 1344*f0984d40SFabiano Rosas 1345*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1346*f0984d40SFabiano Rosas return true; 1347*f0984d40SFabiano Rosas } 1348*f0984d40SFabiano Rosas 1349*f0984d40SFabiano Rosas /* 1350*f0984d40SFabiano Rosas * To avoid excessive duplication of ops we implement shift 1351*f0984d40SFabiano Rosas * by immediate using the variable shift operations. 1352*f0984d40SFabiano Rosas */ 1353*f0984d40SFabiano Rosas constimm = tcg_constant_i64(dup_const(a->size, a->shift)); 1354*f0984d40SFabiano Rosas 1355*f0984d40SFabiano Rosas for (pass = 0; pass < a->q + 1; pass++) { 1356*f0984d40SFabiano Rosas TCGv_i64 tmp = tcg_temp_new_i64(); 1357*f0984d40SFabiano Rosas 1358*f0984d40SFabiano Rosas read_neon_element64(tmp, a->vm, pass, MO_64); 1359*f0984d40SFabiano Rosas fn(tmp, cpu_env, tmp, constimm); 1360*f0984d40SFabiano Rosas write_neon_element64(tmp, a->vd, pass, MO_64); 1361*f0984d40SFabiano Rosas tcg_temp_free_i64(tmp); 1362*f0984d40SFabiano Rosas } 1363*f0984d40SFabiano Rosas return true; 1364*f0984d40SFabiano Rosas } 1365*f0984d40SFabiano Rosas 1366*f0984d40SFabiano Rosas static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, 1367*f0984d40SFabiano Rosas NeonGenTwoOpEnvFn *fn) 1368*f0984d40SFabiano Rosas { 1369*f0984d40SFabiano Rosas /* 1370*f0984d40SFabiano Rosas * 2-reg-and-shift operations, size < 3 case, where the 1371*f0984d40SFabiano Rosas * helper needs to be passed cpu_env. 1372*f0984d40SFabiano Rosas */ 1373*f0984d40SFabiano Rosas TCGv_i32 constimm, tmp; 1374*f0984d40SFabiano Rosas int pass; 1375*f0984d40SFabiano Rosas 1376*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1377*f0984d40SFabiano Rosas return false; 1378*f0984d40SFabiano Rosas } 1379*f0984d40SFabiano Rosas 1380*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1381*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1382*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 1383*f0984d40SFabiano Rosas return false; 1384*f0984d40SFabiano Rosas } 1385*f0984d40SFabiano Rosas 1386*f0984d40SFabiano Rosas if ((a->vm | a->vd) & a->q) { 1387*f0984d40SFabiano Rosas return false; 1388*f0984d40SFabiano Rosas } 1389*f0984d40SFabiano Rosas 1390*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1391*f0984d40SFabiano Rosas return true; 1392*f0984d40SFabiano Rosas } 1393*f0984d40SFabiano Rosas 1394*f0984d40SFabiano Rosas /* 1395*f0984d40SFabiano Rosas * To avoid excessive duplication of ops we implement shift 1396*f0984d40SFabiano Rosas * by immediate using the variable shift operations. 1397*f0984d40SFabiano Rosas */ 1398*f0984d40SFabiano Rosas constimm = tcg_constant_i32(dup_const(a->size, a->shift)); 1399*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 1400*f0984d40SFabiano Rosas 1401*f0984d40SFabiano Rosas for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 1402*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, pass, MO_32); 1403*f0984d40SFabiano Rosas fn(tmp, cpu_env, tmp, constimm); 1404*f0984d40SFabiano Rosas write_neon_element32(tmp, a->vd, pass, MO_32); 1405*f0984d40SFabiano Rosas } 1406*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 1407*f0984d40SFabiano Rosas return true; 1408*f0984d40SFabiano Rosas } 1409*f0984d40SFabiano Rosas 1410*f0984d40SFabiano Rosas #define DO_2SHIFT_ENV(INSN, FUNC) \ 1411*f0984d40SFabiano Rosas static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \ 1412*f0984d40SFabiano Rosas { \ 1413*f0984d40SFabiano Rosas return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \ 1414*f0984d40SFabiano Rosas } \ 1415*f0984d40SFabiano Rosas static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1416*f0984d40SFabiano Rosas { \ 1417*f0984d40SFabiano Rosas static NeonGenTwoOpEnvFn * const fns[] = { \ 1418*f0984d40SFabiano Rosas gen_helper_neon_##FUNC##8, \ 1419*f0984d40SFabiano Rosas gen_helper_neon_##FUNC##16, \ 1420*f0984d40SFabiano Rosas gen_helper_neon_##FUNC##32, \ 1421*f0984d40SFabiano Rosas }; \ 1422*f0984d40SFabiano Rosas assert(a->size < ARRAY_SIZE(fns)); \ 1423*f0984d40SFabiano Rosas return do_2shift_env_32(s, a, fns[a->size]); \ 1424*f0984d40SFabiano Rosas } 1425*f0984d40SFabiano Rosas 1426*f0984d40SFabiano Rosas DO_2SHIFT_ENV(VQSHLU, qshlu_s) 1427*f0984d40SFabiano Rosas DO_2SHIFT_ENV(VQSHL_U, qshl_u) 1428*f0984d40SFabiano Rosas DO_2SHIFT_ENV(VQSHL_S, qshl_s) 1429*f0984d40SFabiano Rosas 1430*f0984d40SFabiano Rosas static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, 1431*f0984d40SFabiano Rosas NeonGenTwo64OpFn *shiftfn, 1432*f0984d40SFabiano Rosas NeonGenNarrowEnvFn *narrowfn) 1433*f0984d40SFabiano Rosas { 1434*f0984d40SFabiano Rosas /* 2-reg-and-shift narrowing-shift operations, size == 3 case */ 1435*f0984d40SFabiano Rosas TCGv_i64 constimm, rm1, rm2; 1436*f0984d40SFabiano Rosas TCGv_i32 rd; 1437*f0984d40SFabiano Rosas 1438*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1439*f0984d40SFabiano Rosas return false; 1440*f0984d40SFabiano Rosas } 1441*f0984d40SFabiano Rosas 1442*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1443*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1444*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 1445*f0984d40SFabiano Rosas return false; 1446*f0984d40SFabiano Rosas } 1447*f0984d40SFabiano Rosas 1448*f0984d40SFabiano Rosas if (a->vm & 1) { 1449*f0984d40SFabiano Rosas return false; 1450*f0984d40SFabiano Rosas } 1451*f0984d40SFabiano Rosas 1452*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1453*f0984d40SFabiano Rosas return true; 1454*f0984d40SFabiano Rosas } 1455*f0984d40SFabiano Rosas 1456*f0984d40SFabiano Rosas /* 1457*f0984d40SFabiano Rosas * This is always a right shift, and the shiftfn is always a 1458*f0984d40SFabiano Rosas * left-shift helper, which thus needs the negated shift count. 1459*f0984d40SFabiano Rosas */ 1460*f0984d40SFabiano Rosas constimm = tcg_constant_i64(-a->shift); 1461*f0984d40SFabiano Rosas rm1 = tcg_temp_new_i64(); 1462*f0984d40SFabiano Rosas rm2 = tcg_temp_new_i64(); 1463*f0984d40SFabiano Rosas rd = tcg_temp_new_i32(); 1464*f0984d40SFabiano Rosas 1465*f0984d40SFabiano Rosas /* Load both inputs first to avoid potential overwrite if rm == rd */ 1466*f0984d40SFabiano Rosas read_neon_element64(rm1, a->vm, 0, MO_64); 1467*f0984d40SFabiano Rosas read_neon_element64(rm2, a->vm, 1, MO_64); 1468*f0984d40SFabiano Rosas 1469*f0984d40SFabiano Rosas shiftfn(rm1, rm1, constimm); 1470*f0984d40SFabiano Rosas narrowfn(rd, cpu_env, rm1); 1471*f0984d40SFabiano Rosas write_neon_element32(rd, a->vd, 0, MO_32); 1472*f0984d40SFabiano Rosas 1473*f0984d40SFabiano Rosas shiftfn(rm2, rm2, constimm); 1474*f0984d40SFabiano Rosas narrowfn(rd, cpu_env, rm2); 1475*f0984d40SFabiano Rosas write_neon_element32(rd, a->vd, 1, MO_32); 1476*f0984d40SFabiano Rosas 1477*f0984d40SFabiano Rosas tcg_temp_free_i32(rd); 1478*f0984d40SFabiano Rosas tcg_temp_free_i64(rm1); 1479*f0984d40SFabiano Rosas tcg_temp_free_i64(rm2); 1480*f0984d40SFabiano Rosas 1481*f0984d40SFabiano Rosas return true; 1482*f0984d40SFabiano Rosas } 1483*f0984d40SFabiano Rosas 1484*f0984d40SFabiano Rosas static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, 1485*f0984d40SFabiano Rosas NeonGenTwoOpFn *shiftfn, 1486*f0984d40SFabiano Rosas NeonGenNarrowEnvFn *narrowfn) 1487*f0984d40SFabiano Rosas { 1488*f0984d40SFabiano Rosas /* 2-reg-and-shift narrowing-shift operations, size < 3 case */ 1489*f0984d40SFabiano Rosas TCGv_i32 constimm, rm1, rm2, rm3, rm4; 1490*f0984d40SFabiano Rosas TCGv_i64 rtmp; 1491*f0984d40SFabiano Rosas uint32_t imm; 1492*f0984d40SFabiano Rosas 1493*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1494*f0984d40SFabiano Rosas return false; 1495*f0984d40SFabiano Rosas } 1496*f0984d40SFabiano Rosas 1497*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1498*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1499*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 1500*f0984d40SFabiano Rosas return false; 1501*f0984d40SFabiano Rosas } 1502*f0984d40SFabiano Rosas 1503*f0984d40SFabiano Rosas if (a->vm & 1) { 1504*f0984d40SFabiano Rosas return false; 1505*f0984d40SFabiano Rosas } 1506*f0984d40SFabiano Rosas 1507*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1508*f0984d40SFabiano Rosas return true; 1509*f0984d40SFabiano Rosas } 1510*f0984d40SFabiano Rosas 1511*f0984d40SFabiano Rosas /* 1512*f0984d40SFabiano Rosas * This is always a right shift, and the shiftfn is always a 1513*f0984d40SFabiano Rosas * left-shift helper, which thus needs the negated shift count 1514*f0984d40SFabiano Rosas * duplicated into each lane of the immediate value. 1515*f0984d40SFabiano Rosas */ 1516*f0984d40SFabiano Rosas if (a->size == 1) { 1517*f0984d40SFabiano Rosas imm = (uint16_t)(-a->shift); 1518*f0984d40SFabiano Rosas imm |= imm << 16; 1519*f0984d40SFabiano Rosas } else { 1520*f0984d40SFabiano Rosas /* size == 2 */ 1521*f0984d40SFabiano Rosas imm = -a->shift; 1522*f0984d40SFabiano Rosas } 1523*f0984d40SFabiano Rosas constimm = tcg_constant_i32(imm); 1524*f0984d40SFabiano Rosas 1525*f0984d40SFabiano Rosas /* Load all inputs first to avoid potential overwrite */ 1526*f0984d40SFabiano Rosas rm1 = tcg_temp_new_i32(); 1527*f0984d40SFabiano Rosas rm2 = tcg_temp_new_i32(); 1528*f0984d40SFabiano Rosas rm3 = tcg_temp_new_i32(); 1529*f0984d40SFabiano Rosas rm4 = tcg_temp_new_i32(); 1530*f0984d40SFabiano Rosas read_neon_element32(rm1, a->vm, 0, MO_32); 1531*f0984d40SFabiano Rosas read_neon_element32(rm2, a->vm, 1, MO_32); 1532*f0984d40SFabiano Rosas read_neon_element32(rm3, a->vm, 2, MO_32); 1533*f0984d40SFabiano Rosas read_neon_element32(rm4, a->vm, 3, MO_32); 1534*f0984d40SFabiano Rosas rtmp = tcg_temp_new_i64(); 1535*f0984d40SFabiano Rosas 1536*f0984d40SFabiano Rosas shiftfn(rm1, rm1, constimm); 1537*f0984d40SFabiano Rosas shiftfn(rm2, rm2, constimm); 1538*f0984d40SFabiano Rosas 1539*f0984d40SFabiano Rosas tcg_gen_concat_i32_i64(rtmp, rm1, rm2); 1540*f0984d40SFabiano Rosas tcg_temp_free_i32(rm2); 1541*f0984d40SFabiano Rosas 1542*f0984d40SFabiano Rosas narrowfn(rm1, cpu_env, rtmp); 1543*f0984d40SFabiano Rosas write_neon_element32(rm1, a->vd, 0, MO_32); 1544*f0984d40SFabiano Rosas tcg_temp_free_i32(rm1); 1545*f0984d40SFabiano Rosas 1546*f0984d40SFabiano Rosas shiftfn(rm3, rm3, constimm); 1547*f0984d40SFabiano Rosas shiftfn(rm4, rm4, constimm); 1548*f0984d40SFabiano Rosas 1549*f0984d40SFabiano Rosas tcg_gen_concat_i32_i64(rtmp, rm3, rm4); 1550*f0984d40SFabiano Rosas tcg_temp_free_i32(rm4); 1551*f0984d40SFabiano Rosas 1552*f0984d40SFabiano Rosas narrowfn(rm3, cpu_env, rtmp); 1553*f0984d40SFabiano Rosas tcg_temp_free_i64(rtmp); 1554*f0984d40SFabiano Rosas write_neon_element32(rm3, a->vd, 1, MO_32); 1555*f0984d40SFabiano Rosas tcg_temp_free_i32(rm3); 1556*f0984d40SFabiano Rosas return true; 1557*f0984d40SFabiano Rosas } 1558*f0984d40SFabiano Rosas 1559*f0984d40SFabiano Rosas #define DO_2SN_64(INSN, FUNC, NARROWFUNC) \ 1560*f0984d40SFabiano Rosas static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1561*f0984d40SFabiano Rosas { \ 1562*f0984d40SFabiano Rosas return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \ 1563*f0984d40SFabiano Rosas } 1564*f0984d40SFabiano Rosas #define DO_2SN_32(INSN, FUNC, NARROWFUNC) \ 1565*f0984d40SFabiano Rosas static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1566*f0984d40SFabiano Rosas { \ 1567*f0984d40SFabiano Rosas return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \ 1568*f0984d40SFabiano Rosas } 1569*f0984d40SFabiano Rosas 1570*f0984d40SFabiano Rosas static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1571*f0984d40SFabiano Rosas { 1572*f0984d40SFabiano Rosas tcg_gen_extrl_i64_i32(dest, src); 1573*f0984d40SFabiano Rosas } 1574*f0984d40SFabiano Rosas 1575*f0984d40SFabiano Rosas static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1576*f0984d40SFabiano Rosas { 1577*f0984d40SFabiano Rosas gen_helper_neon_narrow_u16(dest, src); 1578*f0984d40SFabiano Rosas } 1579*f0984d40SFabiano Rosas 1580*f0984d40SFabiano Rosas static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1581*f0984d40SFabiano Rosas { 1582*f0984d40SFabiano Rosas gen_helper_neon_narrow_u8(dest, src); 1583*f0984d40SFabiano Rosas } 1584*f0984d40SFabiano Rosas 1585*f0984d40SFabiano Rosas DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32) 1586*f0984d40SFabiano Rosas DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16) 1587*f0984d40SFabiano Rosas DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8) 1588*f0984d40SFabiano Rosas 1589*f0984d40SFabiano Rosas DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32) 1590*f0984d40SFabiano Rosas DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16) 1591*f0984d40SFabiano Rosas DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8) 1592*f0984d40SFabiano Rosas 1593*f0984d40SFabiano Rosas DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32) 1594*f0984d40SFabiano Rosas DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16) 1595*f0984d40SFabiano Rosas DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8) 1596*f0984d40SFabiano Rosas 1597*f0984d40SFabiano Rosas DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32) 1598*f0984d40SFabiano Rosas DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16) 1599*f0984d40SFabiano Rosas DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8) 1600*f0984d40SFabiano Rosas DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32) 1601*f0984d40SFabiano Rosas DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16) 1602*f0984d40SFabiano Rosas DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8) 1603*f0984d40SFabiano Rosas 1604*f0984d40SFabiano Rosas DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32) 1605*f0984d40SFabiano Rosas DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16) 1606*f0984d40SFabiano Rosas DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8) 1607*f0984d40SFabiano Rosas 1608*f0984d40SFabiano Rosas DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32) 1609*f0984d40SFabiano Rosas DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16) 1610*f0984d40SFabiano Rosas DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8) 1611*f0984d40SFabiano Rosas 1612*f0984d40SFabiano Rosas DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32) 1613*f0984d40SFabiano Rosas DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16) 1614*f0984d40SFabiano Rosas DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8) 1615*f0984d40SFabiano Rosas 1616*f0984d40SFabiano Rosas static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, 1617*f0984d40SFabiano Rosas NeonGenWidenFn *widenfn, bool u) 1618*f0984d40SFabiano Rosas { 1619*f0984d40SFabiano Rosas TCGv_i64 tmp; 1620*f0984d40SFabiano Rosas TCGv_i32 rm0, rm1; 1621*f0984d40SFabiano Rosas uint64_t widen_mask = 0; 1622*f0984d40SFabiano Rosas 1623*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1624*f0984d40SFabiano Rosas return false; 1625*f0984d40SFabiano Rosas } 1626*f0984d40SFabiano Rosas 1627*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1628*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1629*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 1630*f0984d40SFabiano Rosas return false; 1631*f0984d40SFabiano Rosas } 1632*f0984d40SFabiano Rosas 1633*f0984d40SFabiano Rosas if (a->vd & 1) { 1634*f0984d40SFabiano Rosas return false; 1635*f0984d40SFabiano Rosas } 1636*f0984d40SFabiano Rosas 1637*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1638*f0984d40SFabiano Rosas return true; 1639*f0984d40SFabiano Rosas } 1640*f0984d40SFabiano Rosas 1641*f0984d40SFabiano Rosas /* 1642*f0984d40SFabiano Rosas * This is a widen-and-shift operation. The shift is always less 1643*f0984d40SFabiano Rosas * than the width of the source type, so after widening the input 1644*f0984d40SFabiano Rosas * vector we can simply shift the whole 64-bit widened register, 1645*f0984d40SFabiano Rosas * and then clear the potential overflow bits resulting from left 1646*f0984d40SFabiano Rosas * bits of the narrow input appearing as right bits of the left 1647*f0984d40SFabiano Rosas * neighbour narrow input. Calculate a mask of bits to clear. 1648*f0984d40SFabiano Rosas */ 1649*f0984d40SFabiano Rosas if ((a->shift != 0) && (a->size < 2 || u)) { 1650*f0984d40SFabiano Rosas int esize = 8 << a->size; 1651*f0984d40SFabiano Rosas widen_mask = MAKE_64BIT_MASK(0, esize); 1652*f0984d40SFabiano Rosas widen_mask >>= esize - a->shift; 1653*f0984d40SFabiano Rosas widen_mask = dup_const(a->size + 1, widen_mask); 1654*f0984d40SFabiano Rosas } 1655*f0984d40SFabiano Rosas 1656*f0984d40SFabiano Rosas rm0 = tcg_temp_new_i32(); 1657*f0984d40SFabiano Rosas rm1 = tcg_temp_new_i32(); 1658*f0984d40SFabiano Rosas read_neon_element32(rm0, a->vm, 0, MO_32); 1659*f0984d40SFabiano Rosas read_neon_element32(rm1, a->vm, 1, MO_32); 1660*f0984d40SFabiano Rosas tmp = tcg_temp_new_i64(); 1661*f0984d40SFabiano Rosas 1662*f0984d40SFabiano Rosas widenfn(tmp, rm0); 1663*f0984d40SFabiano Rosas tcg_temp_free_i32(rm0); 1664*f0984d40SFabiano Rosas if (a->shift != 0) { 1665*f0984d40SFabiano Rosas tcg_gen_shli_i64(tmp, tmp, a->shift); 1666*f0984d40SFabiano Rosas tcg_gen_andi_i64(tmp, tmp, ~widen_mask); 1667*f0984d40SFabiano Rosas } 1668*f0984d40SFabiano Rosas write_neon_element64(tmp, a->vd, 0, MO_64); 1669*f0984d40SFabiano Rosas 1670*f0984d40SFabiano Rosas widenfn(tmp, rm1); 1671*f0984d40SFabiano Rosas tcg_temp_free_i32(rm1); 1672*f0984d40SFabiano Rosas if (a->shift != 0) { 1673*f0984d40SFabiano Rosas tcg_gen_shli_i64(tmp, tmp, a->shift); 1674*f0984d40SFabiano Rosas tcg_gen_andi_i64(tmp, tmp, ~widen_mask); 1675*f0984d40SFabiano Rosas } 1676*f0984d40SFabiano Rosas write_neon_element64(tmp, a->vd, 1, MO_64); 1677*f0984d40SFabiano Rosas tcg_temp_free_i64(tmp); 1678*f0984d40SFabiano Rosas return true; 1679*f0984d40SFabiano Rosas } 1680*f0984d40SFabiano Rosas 1681*f0984d40SFabiano Rosas static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) 1682*f0984d40SFabiano Rosas { 1683*f0984d40SFabiano Rosas static NeonGenWidenFn * const widenfn[] = { 1684*f0984d40SFabiano Rosas gen_helper_neon_widen_s8, 1685*f0984d40SFabiano Rosas gen_helper_neon_widen_s16, 1686*f0984d40SFabiano Rosas tcg_gen_ext_i32_i64, 1687*f0984d40SFabiano Rosas }; 1688*f0984d40SFabiano Rosas return do_vshll_2sh(s, a, widenfn[a->size], false); 1689*f0984d40SFabiano Rosas } 1690*f0984d40SFabiano Rosas 1691*f0984d40SFabiano Rosas static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) 1692*f0984d40SFabiano Rosas { 1693*f0984d40SFabiano Rosas static NeonGenWidenFn * const widenfn[] = { 1694*f0984d40SFabiano Rosas gen_helper_neon_widen_u8, 1695*f0984d40SFabiano Rosas gen_helper_neon_widen_u16, 1696*f0984d40SFabiano Rosas tcg_gen_extu_i32_i64, 1697*f0984d40SFabiano Rosas }; 1698*f0984d40SFabiano Rosas return do_vshll_2sh(s, a, widenfn[a->size], true); 1699*f0984d40SFabiano Rosas } 1700*f0984d40SFabiano Rosas 1701*f0984d40SFabiano Rosas static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, 1702*f0984d40SFabiano Rosas gen_helper_gvec_2_ptr *fn) 1703*f0984d40SFabiano Rosas { 1704*f0984d40SFabiano Rosas /* FP operations in 2-reg-and-shift group */ 1705*f0984d40SFabiano Rosas int vec_size = a->q ? 16 : 8; 1706*f0984d40SFabiano Rosas int rd_ofs = neon_full_reg_offset(a->vd); 1707*f0984d40SFabiano Rosas int rm_ofs = neon_full_reg_offset(a->vm); 1708*f0984d40SFabiano Rosas TCGv_ptr fpst; 1709*f0984d40SFabiano Rosas 1710*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1711*f0984d40SFabiano Rosas return false; 1712*f0984d40SFabiano Rosas } 1713*f0984d40SFabiano Rosas 1714*f0984d40SFabiano Rosas if (a->size == MO_16) { 1715*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { 1716*f0984d40SFabiano Rosas return false; 1717*f0984d40SFabiano Rosas } 1718*f0984d40SFabiano Rosas } 1719*f0984d40SFabiano Rosas 1720*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1721*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1722*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 1723*f0984d40SFabiano Rosas return false; 1724*f0984d40SFabiano Rosas } 1725*f0984d40SFabiano Rosas 1726*f0984d40SFabiano Rosas if ((a->vm | a->vd) & a->q) { 1727*f0984d40SFabiano Rosas return false; 1728*f0984d40SFabiano Rosas } 1729*f0984d40SFabiano Rosas 1730*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1731*f0984d40SFabiano Rosas return true; 1732*f0984d40SFabiano Rosas } 1733*f0984d40SFabiano Rosas 1734*f0984d40SFabiano Rosas fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 1735*f0984d40SFabiano Rosas tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn); 1736*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpst); 1737*f0984d40SFabiano Rosas return true; 1738*f0984d40SFabiano Rosas } 1739*f0984d40SFabiano Rosas 1740*f0984d40SFabiano Rosas #define DO_FP_2SH(INSN, FUNC) \ 1741*f0984d40SFabiano Rosas static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1742*f0984d40SFabiano Rosas { \ 1743*f0984d40SFabiano Rosas return do_fp_2sh(s, a, FUNC); \ 1744*f0984d40SFabiano Rosas } 1745*f0984d40SFabiano Rosas 1746*f0984d40SFabiano Rosas DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf) 1747*f0984d40SFabiano Rosas DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf) 1748*f0984d40SFabiano Rosas DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs) 1749*f0984d40SFabiano Rosas DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu) 1750*f0984d40SFabiano Rosas 1751*f0984d40SFabiano Rosas DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh) 1752*f0984d40SFabiano Rosas DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh) 1753*f0984d40SFabiano Rosas DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs) 1754*f0984d40SFabiano Rosas DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu) 1755*f0984d40SFabiano Rosas 1756*f0984d40SFabiano Rosas static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, 1757*f0984d40SFabiano Rosas GVecGen2iFn *fn) 1758*f0984d40SFabiano Rosas { 1759*f0984d40SFabiano Rosas uint64_t imm; 1760*f0984d40SFabiano Rosas int reg_ofs, vec_size; 1761*f0984d40SFabiano Rosas 1762*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1763*f0984d40SFabiano Rosas return false; 1764*f0984d40SFabiano Rosas } 1765*f0984d40SFabiano Rosas 1766*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1767*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 1768*f0984d40SFabiano Rosas return false; 1769*f0984d40SFabiano Rosas } 1770*f0984d40SFabiano Rosas 1771*f0984d40SFabiano Rosas if (a->vd & a->q) { 1772*f0984d40SFabiano Rosas return false; 1773*f0984d40SFabiano Rosas } 1774*f0984d40SFabiano Rosas 1775*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1776*f0984d40SFabiano Rosas return true; 1777*f0984d40SFabiano Rosas } 1778*f0984d40SFabiano Rosas 1779*f0984d40SFabiano Rosas reg_ofs = neon_full_reg_offset(a->vd); 1780*f0984d40SFabiano Rosas vec_size = a->q ? 16 : 8; 1781*f0984d40SFabiano Rosas imm = asimd_imm_const(a->imm, a->cmode, a->op); 1782*f0984d40SFabiano Rosas 1783*f0984d40SFabiano Rosas fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size); 1784*f0984d40SFabiano Rosas return true; 1785*f0984d40SFabiano Rosas } 1786*f0984d40SFabiano Rosas 1787*f0984d40SFabiano Rosas static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs, 1788*f0984d40SFabiano Rosas int64_t c, uint32_t oprsz, uint32_t maxsz) 1789*f0984d40SFabiano Rosas { 1790*f0984d40SFabiano Rosas tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 1791*f0984d40SFabiano Rosas } 1792*f0984d40SFabiano Rosas 1793*f0984d40SFabiano Rosas static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) 1794*f0984d40SFabiano Rosas { 1795*f0984d40SFabiano Rosas /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1796*f0984d40SFabiano Rosas GVecGen2iFn *fn; 1797*f0984d40SFabiano Rosas 1798*f0984d40SFabiano Rosas if ((a->cmode & 1) && a->cmode < 12) { 1799*f0984d40SFabiano Rosas /* for op=1, the imm will be inverted, so BIC becomes AND. */ 1800*f0984d40SFabiano Rosas fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 1801*f0984d40SFabiano Rosas } else { 1802*f0984d40SFabiano Rosas /* There is one unallocated cmode/op combination in this space */ 1803*f0984d40SFabiano Rosas if (a->cmode == 15 && a->op == 1) { 1804*f0984d40SFabiano Rosas return false; 1805*f0984d40SFabiano Rosas } 1806*f0984d40SFabiano Rosas fn = gen_VMOV_1r; 1807*f0984d40SFabiano Rosas } 1808*f0984d40SFabiano Rosas return do_1reg_imm(s, a, fn); 1809*f0984d40SFabiano Rosas } 1810*f0984d40SFabiano Rosas 1811*f0984d40SFabiano Rosas static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, 1812*f0984d40SFabiano Rosas NeonGenWidenFn *widenfn, 1813*f0984d40SFabiano Rosas NeonGenTwo64OpFn *opfn, 1814*f0984d40SFabiano Rosas int src1_mop, int src2_mop) 1815*f0984d40SFabiano Rosas { 1816*f0984d40SFabiano Rosas /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */ 1817*f0984d40SFabiano Rosas TCGv_i64 rn0_64, rn1_64, rm_64; 1818*f0984d40SFabiano Rosas 1819*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1820*f0984d40SFabiano Rosas return false; 1821*f0984d40SFabiano Rosas } 1822*f0984d40SFabiano Rosas 1823*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1824*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1825*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 1826*f0984d40SFabiano Rosas return false; 1827*f0984d40SFabiano Rosas } 1828*f0984d40SFabiano Rosas 1829*f0984d40SFabiano Rosas if (!opfn) { 1830*f0984d40SFabiano Rosas /* size == 3 case, which is an entirely different insn group */ 1831*f0984d40SFabiano Rosas return false; 1832*f0984d40SFabiano Rosas } 1833*f0984d40SFabiano Rosas 1834*f0984d40SFabiano Rosas if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) { 1835*f0984d40SFabiano Rosas return false; 1836*f0984d40SFabiano Rosas } 1837*f0984d40SFabiano Rosas 1838*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1839*f0984d40SFabiano Rosas return true; 1840*f0984d40SFabiano Rosas } 1841*f0984d40SFabiano Rosas 1842*f0984d40SFabiano Rosas rn0_64 = tcg_temp_new_i64(); 1843*f0984d40SFabiano Rosas rn1_64 = tcg_temp_new_i64(); 1844*f0984d40SFabiano Rosas rm_64 = tcg_temp_new_i64(); 1845*f0984d40SFabiano Rosas 1846*f0984d40SFabiano Rosas if (src1_mop >= 0) { 1847*f0984d40SFabiano Rosas read_neon_element64(rn0_64, a->vn, 0, src1_mop); 1848*f0984d40SFabiano Rosas } else { 1849*f0984d40SFabiano Rosas TCGv_i32 tmp = tcg_temp_new_i32(); 1850*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vn, 0, MO_32); 1851*f0984d40SFabiano Rosas widenfn(rn0_64, tmp); 1852*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 1853*f0984d40SFabiano Rosas } 1854*f0984d40SFabiano Rosas if (src2_mop >= 0) { 1855*f0984d40SFabiano Rosas read_neon_element64(rm_64, a->vm, 0, src2_mop); 1856*f0984d40SFabiano Rosas } else { 1857*f0984d40SFabiano Rosas TCGv_i32 tmp = tcg_temp_new_i32(); 1858*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, 0, MO_32); 1859*f0984d40SFabiano Rosas widenfn(rm_64, tmp); 1860*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 1861*f0984d40SFabiano Rosas } 1862*f0984d40SFabiano Rosas 1863*f0984d40SFabiano Rosas opfn(rn0_64, rn0_64, rm_64); 1864*f0984d40SFabiano Rosas 1865*f0984d40SFabiano Rosas /* 1866*f0984d40SFabiano Rosas * Load second pass inputs before storing the first pass result, to 1867*f0984d40SFabiano Rosas * avoid incorrect results if a narrow input overlaps with the result. 1868*f0984d40SFabiano Rosas */ 1869*f0984d40SFabiano Rosas if (src1_mop >= 0) { 1870*f0984d40SFabiano Rosas read_neon_element64(rn1_64, a->vn, 1, src1_mop); 1871*f0984d40SFabiano Rosas } else { 1872*f0984d40SFabiano Rosas TCGv_i32 tmp = tcg_temp_new_i32(); 1873*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vn, 1, MO_32); 1874*f0984d40SFabiano Rosas widenfn(rn1_64, tmp); 1875*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 1876*f0984d40SFabiano Rosas } 1877*f0984d40SFabiano Rosas if (src2_mop >= 0) { 1878*f0984d40SFabiano Rosas read_neon_element64(rm_64, a->vm, 1, src2_mop); 1879*f0984d40SFabiano Rosas } else { 1880*f0984d40SFabiano Rosas TCGv_i32 tmp = tcg_temp_new_i32(); 1881*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, 1, MO_32); 1882*f0984d40SFabiano Rosas widenfn(rm_64, tmp); 1883*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 1884*f0984d40SFabiano Rosas } 1885*f0984d40SFabiano Rosas 1886*f0984d40SFabiano Rosas write_neon_element64(rn0_64, a->vd, 0, MO_64); 1887*f0984d40SFabiano Rosas 1888*f0984d40SFabiano Rosas opfn(rn1_64, rn1_64, rm_64); 1889*f0984d40SFabiano Rosas write_neon_element64(rn1_64, a->vd, 1, MO_64); 1890*f0984d40SFabiano Rosas 1891*f0984d40SFabiano Rosas tcg_temp_free_i64(rn0_64); 1892*f0984d40SFabiano Rosas tcg_temp_free_i64(rn1_64); 1893*f0984d40SFabiano Rosas tcg_temp_free_i64(rm_64); 1894*f0984d40SFabiano Rosas 1895*f0984d40SFabiano Rosas return true; 1896*f0984d40SFabiano Rosas } 1897*f0984d40SFabiano Rosas 1898*f0984d40SFabiano Rosas #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \ 1899*f0984d40SFabiano Rosas static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1900*f0984d40SFabiano Rosas { \ 1901*f0984d40SFabiano Rosas static NeonGenWidenFn * const widenfn[] = { \ 1902*f0984d40SFabiano Rosas gen_helper_neon_widen_##S##8, \ 1903*f0984d40SFabiano Rosas gen_helper_neon_widen_##S##16, \ 1904*f0984d40SFabiano Rosas NULL, NULL, \ 1905*f0984d40SFabiano Rosas }; \ 1906*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const addfn[] = { \ 1907*f0984d40SFabiano Rosas gen_helper_neon_##OP##l_u16, \ 1908*f0984d40SFabiano Rosas gen_helper_neon_##OP##l_u32, \ 1909*f0984d40SFabiano Rosas tcg_gen_##OP##_i64, \ 1910*f0984d40SFabiano Rosas NULL, \ 1911*f0984d40SFabiano Rosas }; \ 1912*f0984d40SFabiano Rosas int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \ 1913*f0984d40SFabiano Rosas return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \ 1914*f0984d40SFabiano Rosas SRC1WIDE ? MO_UQ : narrow_mop, \ 1915*f0984d40SFabiano Rosas narrow_mop); \ 1916*f0984d40SFabiano Rosas } 1917*f0984d40SFabiano Rosas 1918*f0984d40SFabiano Rosas DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN) 1919*f0984d40SFabiano Rosas DO_PREWIDEN(VADDL_U, u, add, false, 0) 1920*f0984d40SFabiano Rosas DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN) 1921*f0984d40SFabiano Rosas DO_PREWIDEN(VSUBL_U, u, sub, false, 0) 1922*f0984d40SFabiano Rosas DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN) 1923*f0984d40SFabiano Rosas DO_PREWIDEN(VADDW_U, u, add, true, 0) 1924*f0984d40SFabiano Rosas DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN) 1925*f0984d40SFabiano Rosas DO_PREWIDEN(VSUBW_U, u, sub, true, 0) 1926*f0984d40SFabiano Rosas 1927*f0984d40SFabiano Rosas static bool do_narrow_3d(DisasContext *s, arg_3diff *a, 1928*f0984d40SFabiano Rosas NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn) 1929*f0984d40SFabiano Rosas { 1930*f0984d40SFabiano Rosas /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */ 1931*f0984d40SFabiano Rosas TCGv_i64 rn_64, rm_64; 1932*f0984d40SFabiano Rosas TCGv_i32 rd0, rd1; 1933*f0984d40SFabiano Rosas 1934*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1935*f0984d40SFabiano Rosas return false; 1936*f0984d40SFabiano Rosas } 1937*f0984d40SFabiano Rosas 1938*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 1939*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 1940*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 1941*f0984d40SFabiano Rosas return false; 1942*f0984d40SFabiano Rosas } 1943*f0984d40SFabiano Rosas 1944*f0984d40SFabiano Rosas if (!opfn || !narrowfn) { 1945*f0984d40SFabiano Rosas /* size == 3 case, which is an entirely different insn group */ 1946*f0984d40SFabiano Rosas return false; 1947*f0984d40SFabiano Rosas } 1948*f0984d40SFabiano Rosas 1949*f0984d40SFabiano Rosas if ((a->vn | a->vm) & 1) { 1950*f0984d40SFabiano Rosas return false; 1951*f0984d40SFabiano Rosas } 1952*f0984d40SFabiano Rosas 1953*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 1954*f0984d40SFabiano Rosas return true; 1955*f0984d40SFabiano Rosas } 1956*f0984d40SFabiano Rosas 1957*f0984d40SFabiano Rosas rn_64 = tcg_temp_new_i64(); 1958*f0984d40SFabiano Rosas rm_64 = tcg_temp_new_i64(); 1959*f0984d40SFabiano Rosas rd0 = tcg_temp_new_i32(); 1960*f0984d40SFabiano Rosas rd1 = tcg_temp_new_i32(); 1961*f0984d40SFabiano Rosas 1962*f0984d40SFabiano Rosas read_neon_element64(rn_64, a->vn, 0, MO_64); 1963*f0984d40SFabiano Rosas read_neon_element64(rm_64, a->vm, 0, MO_64); 1964*f0984d40SFabiano Rosas 1965*f0984d40SFabiano Rosas opfn(rn_64, rn_64, rm_64); 1966*f0984d40SFabiano Rosas 1967*f0984d40SFabiano Rosas narrowfn(rd0, rn_64); 1968*f0984d40SFabiano Rosas 1969*f0984d40SFabiano Rosas read_neon_element64(rn_64, a->vn, 1, MO_64); 1970*f0984d40SFabiano Rosas read_neon_element64(rm_64, a->vm, 1, MO_64); 1971*f0984d40SFabiano Rosas 1972*f0984d40SFabiano Rosas opfn(rn_64, rn_64, rm_64); 1973*f0984d40SFabiano Rosas 1974*f0984d40SFabiano Rosas narrowfn(rd1, rn_64); 1975*f0984d40SFabiano Rosas 1976*f0984d40SFabiano Rosas write_neon_element32(rd0, a->vd, 0, MO_32); 1977*f0984d40SFabiano Rosas write_neon_element32(rd1, a->vd, 1, MO_32); 1978*f0984d40SFabiano Rosas 1979*f0984d40SFabiano Rosas tcg_temp_free_i32(rd0); 1980*f0984d40SFabiano Rosas tcg_temp_free_i32(rd1); 1981*f0984d40SFabiano Rosas tcg_temp_free_i64(rn_64); 1982*f0984d40SFabiano Rosas tcg_temp_free_i64(rm_64); 1983*f0984d40SFabiano Rosas 1984*f0984d40SFabiano Rosas return true; 1985*f0984d40SFabiano Rosas } 1986*f0984d40SFabiano Rosas 1987*f0984d40SFabiano Rosas #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \ 1988*f0984d40SFabiano Rosas static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1989*f0984d40SFabiano Rosas { \ 1990*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const addfn[] = { \ 1991*f0984d40SFabiano Rosas gen_helper_neon_##OP##l_u16, \ 1992*f0984d40SFabiano Rosas gen_helper_neon_##OP##l_u32, \ 1993*f0984d40SFabiano Rosas tcg_gen_##OP##_i64, \ 1994*f0984d40SFabiano Rosas NULL, \ 1995*f0984d40SFabiano Rosas }; \ 1996*f0984d40SFabiano Rosas static NeonGenNarrowFn * const narrowfn[] = { \ 1997*f0984d40SFabiano Rosas gen_helper_neon_##NARROWTYPE##_high_u8, \ 1998*f0984d40SFabiano Rosas gen_helper_neon_##NARROWTYPE##_high_u16, \ 1999*f0984d40SFabiano Rosas EXTOP, \ 2000*f0984d40SFabiano Rosas NULL, \ 2001*f0984d40SFabiano Rosas }; \ 2002*f0984d40SFabiano Rosas return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \ 2003*f0984d40SFabiano Rosas } 2004*f0984d40SFabiano Rosas 2005*f0984d40SFabiano Rosas static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn) 2006*f0984d40SFabiano Rosas { 2007*f0984d40SFabiano Rosas tcg_gen_addi_i64(rn, rn, 1u << 31); 2008*f0984d40SFabiano Rosas tcg_gen_extrh_i64_i32(rd, rn); 2009*f0984d40SFabiano Rosas } 2010*f0984d40SFabiano Rosas 2011*f0984d40SFabiano Rosas DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32) 2012*f0984d40SFabiano Rosas DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32) 2013*f0984d40SFabiano Rosas DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32) 2014*f0984d40SFabiano Rosas DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32) 2015*f0984d40SFabiano Rosas 2016*f0984d40SFabiano Rosas static bool do_long_3d(DisasContext *s, arg_3diff *a, 2017*f0984d40SFabiano Rosas NeonGenTwoOpWidenFn *opfn, 2018*f0984d40SFabiano Rosas NeonGenTwo64OpFn *accfn) 2019*f0984d40SFabiano Rosas { 2020*f0984d40SFabiano Rosas /* 2021*f0984d40SFabiano Rosas * 3-regs different lengths, long operations. 2022*f0984d40SFabiano Rosas * These perform an operation on two inputs that returns a double-width 2023*f0984d40SFabiano Rosas * result, and then possibly perform an accumulation operation of 2024*f0984d40SFabiano Rosas * that result into the double-width destination. 2025*f0984d40SFabiano Rosas */ 2026*f0984d40SFabiano Rosas TCGv_i64 rd0, rd1, tmp; 2027*f0984d40SFabiano Rosas TCGv_i32 rn, rm; 2028*f0984d40SFabiano Rosas 2029*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2030*f0984d40SFabiano Rosas return false; 2031*f0984d40SFabiano Rosas } 2032*f0984d40SFabiano Rosas 2033*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 2034*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 2035*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 2036*f0984d40SFabiano Rosas return false; 2037*f0984d40SFabiano Rosas } 2038*f0984d40SFabiano Rosas 2039*f0984d40SFabiano Rosas if (!opfn) { 2040*f0984d40SFabiano Rosas /* size == 3 case, which is an entirely different insn group */ 2041*f0984d40SFabiano Rosas return false; 2042*f0984d40SFabiano Rosas } 2043*f0984d40SFabiano Rosas 2044*f0984d40SFabiano Rosas if (a->vd & 1) { 2045*f0984d40SFabiano Rosas return false; 2046*f0984d40SFabiano Rosas } 2047*f0984d40SFabiano Rosas 2048*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 2049*f0984d40SFabiano Rosas return true; 2050*f0984d40SFabiano Rosas } 2051*f0984d40SFabiano Rosas 2052*f0984d40SFabiano Rosas rd0 = tcg_temp_new_i64(); 2053*f0984d40SFabiano Rosas rd1 = tcg_temp_new_i64(); 2054*f0984d40SFabiano Rosas 2055*f0984d40SFabiano Rosas rn = tcg_temp_new_i32(); 2056*f0984d40SFabiano Rosas rm = tcg_temp_new_i32(); 2057*f0984d40SFabiano Rosas read_neon_element32(rn, a->vn, 0, MO_32); 2058*f0984d40SFabiano Rosas read_neon_element32(rm, a->vm, 0, MO_32); 2059*f0984d40SFabiano Rosas opfn(rd0, rn, rm); 2060*f0984d40SFabiano Rosas 2061*f0984d40SFabiano Rosas read_neon_element32(rn, a->vn, 1, MO_32); 2062*f0984d40SFabiano Rosas read_neon_element32(rm, a->vm, 1, MO_32); 2063*f0984d40SFabiano Rosas opfn(rd1, rn, rm); 2064*f0984d40SFabiano Rosas tcg_temp_free_i32(rn); 2065*f0984d40SFabiano Rosas tcg_temp_free_i32(rm); 2066*f0984d40SFabiano Rosas 2067*f0984d40SFabiano Rosas /* Don't store results until after all loads: they might overlap */ 2068*f0984d40SFabiano Rosas if (accfn) { 2069*f0984d40SFabiano Rosas tmp = tcg_temp_new_i64(); 2070*f0984d40SFabiano Rosas read_neon_element64(tmp, a->vd, 0, MO_64); 2071*f0984d40SFabiano Rosas accfn(rd0, tmp, rd0); 2072*f0984d40SFabiano Rosas read_neon_element64(tmp, a->vd, 1, MO_64); 2073*f0984d40SFabiano Rosas accfn(rd1, tmp, rd1); 2074*f0984d40SFabiano Rosas tcg_temp_free_i64(tmp); 2075*f0984d40SFabiano Rosas } 2076*f0984d40SFabiano Rosas 2077*f0984d40SFabiano Rosas write_neon_element64(rd0, a->vd, 0, MO_64); 2078*f0984d40SFabiano Rosas write_neon_element64(rd1, a->vd, 1, MO_64); 2079*f0984d40SFabiano Rosas tcg_temp_free_i64(rd0); 2080*f0984d40SFabiano Rosas tcg_temp_free_i64(rd1); 2081*f0984d40SFabiano Rosas 2082*f0984d40SFabiano Rosas return true; 2083*f0984d40SFabiano Rosas } 2084*f0984d40SFabiano Rosas 2085*f0984d40SFabiano Rosas static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a) 2086*f0984d40SFabiano Rosas { 2087*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2088*f0984d40SFabiano Rosas gen_helper_neon_abdl_s16, 2089*f0984d40SFabiano Rosas gen_helper_neon_abdl_s32, 2090*f0984d40SFabiano Rosas gen_helper_neon_abdl_s64, 2091*f0984d40SFabiano Rosas NULL, 2092*f0984d40SFabiano Rosas }; 2093*f0984d40SFabiano Rosas 2094*f0984d40SFabiano Rosas return do_long_3d(s, a, opfn[a->size], NULL); 2095*f0984d40SFabiano Rosas } 2096*f0984d40SFabiano Rosas 2097*f0984d40SFabiano Rosas static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a) 2098*f0984d40SFabiano Rosas { 2099*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2100*f0984d40SFabiano Rosas gen_helper_neon_abdl_u16, 2101*f0984d40SFabiano Rosas gen_helper_neon_abdl_u32, 2102*f0984d40SFabiano Rosas gen_helper_neon_abdl_u64, 2103*f0984d40SFabiano Rosas NULL, 2104*f0984d40SFabiano Rosas }; 2105*f0984d40SFabiano Rosas 2106*f0984d40SFabiano Rosas return do_long_3d(s, a, opfn[a->size], NULL); 2107*f0984d40SFabiano Rosas } 2108*f0984d40SFabiano Rosas 2109*f0984d40SFabiano Rosas static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a) 2110*f0984d40SFabiano Rosas { 2111*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2112*f0984d40SFabiano Rosas gen_helper_neon_abdl_s16, 2113*f0984d40SFabiano Rosas gen_helper_neon_abdl_s32, 2114*f0984d40SFabiano Rosas gen_helper_neon_abdl_s64, 2115*f0984d40SFabiano Rosas NULL, 2116*f0984d40SFabiano Rosas }; 2117*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const addfn[] = { 2118*f0984d40SFabiano Rosas gen_helper_neon_addl_u16, 2119*f0984d40SFabiano Rosas gen_helper_neon_addl_u32, 2120*f0984d40SFabiano Rosas tcg_gen_add_i64, 2121*f0984d40SFabiano Rosas NULL, 2122*f0984d40SFabiano Rosas }; 2123*f0984d40SFabiano Rosas 2124*f0984d40SFabiano Rosas return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 2125*f0984d40SFabiano Rosas } 2126*f0984d40SFabiano Rosas 2127*f0984d40SFabiano Rosas static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a) 2128*f0984d40SFabiano Rosas { 2129*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2130*f0984d40SFabiano Rosas gen_helper_neon_abdl_u16, 2131*f0984d40SFabiano Rosas gen_helper_neon_abdl_u32, 2132*f0984d40SFabiano Rosas gen_helper_neon_abdl_u64, 2133*f0984d40SFabiano Rosas NULL, 2134*f0984d40SFabiano Rosas }; 2135*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const addfn[] = { 2136*f0984d40SFabiano Rosas gen_helper_neon_addl_u16, 2137*f0984d40SFabiano Rosas gen_helper_neon_addl_u32, 2138*f0984d40SFabiano Rosas tcg_gen_add_i64, 2139*f0984d40SFabiano Rosas NULL, 2140*f0984d40SFabiano Rosas }; 2141*f0984d40SFabiano Rosas 2142*f0984d40SFabiano Rosas return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 2143*f0984d40SFabiano Rosas } 2144*f0984d40SFabiano Rosas 2145*f0984d40SFabiano Rosas static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2146*f0984d40SFabiano Rosas { 2147*f0984d40SFabiano Rosas TCGv_i32 lo = tcg_temp_new_i32(); 2148*f0984d40SFabiano Rosas TCGv_i32 hi = tcg_temp_new_i32(); 2149*f0984d40SFabiano Rosas 2150*f0984d40SFabiano Rosas tcg_gen_muls2_i32(lo, hi, rn, rm); 2151*f0984d40SFabiano Rosas tcg_gen_concat_i32_i64(rd, lo, hi); 2152*f0984d40SFabiano Rosas 2153*f0984d40SFabiano Rosas tcg_temp_free_i32(lo); 2154*f0984d40SFabiano Rosas tcg_temp_free_i32(hi); 2155*f0984d40SFabiano Rosas } 2156*f0984d40SFabiano Rosas 2157*f0984d40SFabiano Rosas static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2158*f0984d40SFabiano Rosas { 2159*f0984d40SFabiano Rosas TCGv_i32 lo = tcg_temp_new_i32(); 2160*f0984d40SFabiano Rosas TCGv_i32 hi = tcg_temp_new_i32(); 2161*f0984d40SFabiano Rosas 2162*f0984d40SFabiano Rosas tcg_gen_mulu2_i32(lo, hi, rn, rm); 2163*f0984d40SFabiano Rosas tcg_gen_concat_i32_i64(rd, lo, hi); 2164*f0984d40SFabiano Rosas 2165*f0984d40SFabiano Rosas tcg_temp_free_i32(lo); 2166*f0984d40SFabiano Rosas tcg_temp_free_i32(hi); 2167*f0984d40SFabiano Rosas } 2168*f0984d40SFabiano Rosas 2169*f0984d40SFabiano Rosas static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a) 2170*f0984d40SFabiano Rosas { 2171*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2172*f0984d40SFabiano Rosas gen_helper_neon_mull_s8, 2173*f0984d40SFabiano Rosas gen_helper_neon_mull_s16, 2174*f0984d40SFabiano Rosas gen_mull_s32, 2175*f0984d40SFabiano Rosas NULL, 2176*f0984d40SFabiano Rosas }; 2177*f0984d40SFabiano Rosas 2178*f0984d40SFabiano Rosas return do_long_3d(s, a, opfn[a->size], NULL); 2179*f0984d40SFabiano Rosas } 2180*f0984d40SFabiano Rosas 2181*f0984d40SFabiano Rosas static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a) 2182*f0984d40SFabiano Rosas { 2183*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2184*f0984d40SFabiano Rosas gen_helper_neon_mull_u8, 2185*f0984d40SFabiano Rosas gen_helper_neon_mull_u16, 2186*f0984d40SFabiano Rosas gen_mull_u32, 2187*f0984d40SFabiano Rosas NULL, 2188*f0984d40SFabiano Rosas }; 2189*f0984d40SFabiano Rosas 2190*f0984d40SFabiano Rosas return do_long_3d(s, a, opfn[a->size], NULL); 2191*f0984d40SFabiano Rosas } 2192*f0984d40SFabiano Rosas 2193*f0984d40SFabiano Rosas #define DO_VMLAL(INSN,MULL,ACC) \ 2194*f0984d40SFabiano Rosas static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 2195*f0984d40SFabiano Rosas { \ 2196*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { \ 2197*f0984d40SFabiano Rosas gen_helper_neon_##MULL##8, \ 2198*f0984d40SFabiano Rosas gen_helper_neon_##MULL##16, \ 2199*f0984d40SFabiano Rosas gen_##MULL##32, \ 2200*f0984d40SFabiano Rosas NULL, \ 2201*f0984d40SFabiano Rosas }; \ 2202*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const accfn[] = { \ 2203*f0984d40SFabiano Rosas gen_helper_neon_##ACC##l_u16, \ 2204*f0984d40SFabiano Rosas gen_helper_neon_##ACC##l_u32, \ 2205*f0984d40SFabiano Rosas tcg_gen_##ACC##_i64, \ 2206*f0984d40SFabiano Rosas NULL, \ 2207*f0984d40SFabiano Rosas }; \ 2208*f0984d40SFabiano Rosas return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \ 2209*f0984d40SFabiano Rosas } 2210*f0984d40SFabiano Rosas 2211*f0984d40SFabiano Rosas DO_VMLAL(VMLAL_S,mull_s,add) 2212*f0984d40SFabiano Rosas DO_VMLAL(VMLAL_U,mull_u,add) 2213*f0984d40SFabiano Rosas DO_VMLAL(VMLSL_S,mull_s,sub) 2214*f0984d40SFabiano Rosas DO_VMLAL(VMLSL_U,mull_u,sub) 2215*f0984d40SFabiano Rosas 2216*f0984d40SFabiano Rosas static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2217*f0984d40SFabiano Rosas { 2218*f0984d40SFabiano Rosas gen_helper_neon_mull_s16(rd, rn, rm); 2219*f0984d40SFabiano Rosas gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd); 2220*f0984d40SFabiano Rosas } 2221*f0984d40SFabiano Rosas 2222*f0984d40SFabiano Rosas static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2223*f0984d40SFabiano Rosas { 2224*f0984d40SFabiano Rosas gen_mull_s32(rd, rn, rm); 2225*f0984d40SFabiano Rosas gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd); 2226*f0984d40SFabiano Rosas } 2227*f0984d40SFabiano Rosas 2228*f0984d40SFabiano Rosas static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) 2229*f0984d40SFabiano Rosas { 2230*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2231*f0984d40SFabiano Rosas NULL, 2232*f0984d40SFabiano Rosas gen_VQDMULL_16, 2233*f0984d40SFabiano Rosas gen_VQDMULL_32, 2234*f0984d40SFabiano Rosas NULL, 2235*f0984d40SFabiano Rosas }; 2236*f0984d40SFabiano Rosas 2237*f0984d40SFabiano Rosas return do_long_3d(s, a, opfn[a->size], NULL); 2238*f0984d40SFabiano Rosas } 2239*f0984d40SFabiano Rosas 2240*f0984d40SFabiano Rosas static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2241*f0984d40SFabiano Rosas { 2242*f0984d40SFabiano Rosas gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); 2243*f0984d40SFabiano Rosas } 2244*f0984d40SFabiano Rosas 2245*f0984d40SFabiano Rosas static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2246*f0984d40SFabiano Rosas { 2247*f0984d40SFabiano Rosas gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); 2248*f0984d40SFabiano Rosas } 2249*f0984d40SFabiano Rosas 2250*f0984d40SFabiano Rosas static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) 2251*f0984d40SFabiano Rosas { 2252*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2253*f0984d40SFabiano Rosas NULL, 2254*f0984d40SFabiano Rosas gen_VQDMULL_16, 2255*f0984d40SFabiano Rosas gen_VQDMULL_32, 2256*f0984d40SFabiano Rosas NULL, 2257*f0984d40SFabiano Rosas }; 2258*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const accfn[] = { 2259*f0984d40SFabiano Rosas NULL, 2260*f0984d40SFabiano Rosas gen_VQDMLAL_acc_16, 2261*f0984d40SFabiano Rosas gen_VQDMLAL_acc_32, 2262*f0984d40SFabiano Rosas NULL, 2263*f0984d40SFabiano Rosas }; 2264*f0984d40SFabiano Rosas 2265*f0984d40SFabiano Rosas return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2266*f0984d40SFabiano Rosas } 2267*f0984d40SFabiano Rosas 2268*f0984d40SFabiano Rosas static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2269*f0984d40SFabiano Rosas { 2270*f0984d40SFabiano Rosas gen_helper_neon_negl_u32(rm, rm); 2271*f0984d40SFabiano Rosas gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); 2272*f0984d40SFabiano Rosas } 2273*f0984d40SFabiano Rosas 2274*f0984d40SFabiano Rosas static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2275*f0984d40SFabiano Rosas { 2276*f0984d40SFabiano Rosas tcg_gen_neg_i64(rm, rm); 2277*f0984d40SFabiano Rosas gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); 2278*f0984d40SFabiano Rosas } 2279*f0984d40SFabiano Rosas 2280*f0984d40SFabiano Rosas static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a) 2281*f0984d40SFabiano Rosas { 2282*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2283*f0984d40SFabiano Rosas NULL, 2284*f0984d40SFabiano Rosas gen_VQDMULL_16, 2285*f0984d40SFabiano Rosas gen_VQDMULL_32, 2286*f0984d40SFabiano Rosas NULL, 2287*f0984d40SFabiano Rosas }; 2288*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const accfn[] = { 2289*f0984d40SFabiano Rosas NULL, 2290*f0984d40SFabiano Rosas gen_VQDMLSL_acc_16, 2291*f0984d40SFabiano Rosas gen_VQDMLSL_acc_32, 2292*f0984d40SFabiano Rosas NULL, 2293*f0984d40SFabiano Rosas }; 2294*f0984d40SFabiano Rosas 2295*f0984d40SFabiano Rosas return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2296*f0984d40SFabiano Rosas } 2297*f0984d40SFabiano Rosas 2298*f0984d40SFabiano Rosas static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a) 2299*f0984d40SFabiano Rosas { 2300*f0984d40SFabiano Rosas gen_helper_gvec_3 *fn_gvec; 2301*f0984d40SFabiano Rosas 2302*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2303*f0984d40SFabiano Rosas return false; 2304*f0984d40SFabiano Rosas } 2305*f0984d40SFabiano Rosas 2306*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 2307*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 2308*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 2309*f0984d40SFabiano Rosas return false; 2310*f0984d40SFabiano Rosas } 2311*f0984d40SFabiano Rosas 2312*f0984d40SFabiano Rosas if (a->vd & 1) { 2313*f0984d40SFabiano Rosas return false; 2314*f0984d40SFabiano Rosas } 2315*f0984d40SFabiano Rosas 2316*f0984d40SFabiano Rosas switch (a->size) { 2317*f0984d40SFabiano Rosas case 0: 2318*f0984d40SFabiano Rosas fn_gvec = gen_helper_neon_pmull_h; 2319*f0984d40SFabiano Rosas break; 2320*f0984d40SFabiano Rosas case 2: 2321*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_pmull, s)) { 2322*f0984d40SFabiano Rosas return false; 2323*f0984d40SFabiano Rosas } 2324*f0984d40SFabiano Rosas fn_gvec = gen_helper_gvec_pmull_q; 2325*f0984d40SFabiano Rosas break; 2326*f0984d40SFabiano Rosas default: 2327*f0984d40SFabiano Rosas return false; 2328*f0984d40SFabiano Rosas } 2329*f0984d40SFabiano Rosas 2330*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 2331*f0984d40SFabiano Rosas return true; 2332*f0984d40SFabiano Rosas } 2333*f0984d40SFabiano Rosas 2334*f0984d40SFabiano Rosas tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd), 2335*f0984d40SFabiano Rosas neon_full_reg_offset(a->vn), 2336*f0984d40SFabiano Rosas neon_full_reg_offset(a->vm), 2337*f0984d40SFabiano Rosas 16, 16, 0, fn_gvec); 2338*f0984d40SFabiano Rosas return true; 2339*f0984d40SFabiano Rosas } 2340*f0984d40SFabiano Rosas 2341*f0984d40SFabiano Rosas static void gen_neon_dup_low16(TCGv_i32 var) 2342*f0984d40SFabiano Rosas { 2343*f0984d40SFabiano Rosas TCGv_i32 tmp = tcg_temp_new_i32(); 2344*f0984d40SFabiano Rosas tcg_gen_ext16u_i32(var, var); 2345*f0984d40SFabiano Rosas tcg_gen_shli_i32(tmp, var, 16); 2346*f0984d40SFabiano Rosas tcg_gen_or_i32(var, var, tmp); 2347*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 2348*f0984d40SFabiano Rosas } 2349*f0984d40SFabiano Rosas 2350*f0984d40SFabiano Rosas static void gen_neon_dup_high16(TCGv_i32 var) 2351*f0984d40SFabiano Rosas { 2352*f0984d40SFabiano Rosas TCGv_i32 tmp = tcg_temp_new_i32(); 2353*f0984d40SFabiano Rosas tcg_gen_andi_i32(var, var, 0xffff0000); 2354*f0984d40SFabiano Rosas tcg_gen_shri_i32(tmp, var, 16); 2355*f0984d40SFabiano Rosas tcg_gen_or_i32(var, var, tmp); 2356*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 2357*f0984d40SFabiano Rosas } 2358*f0984d40SFabiano Rosas 2359*f0984d40SFabiano Rosas static inline TCGv_i32 neon_get_scalar(int size, int reg) 2360*f0984d40SFabiano Rosas { 2361*f0984d40SFabiano Rosas TCGv_i32 tmp = tcg_temp_new_i32(); 2362*f0984d40SFabiano Rosas if (size == MO_16) { 2363*f0984d40SFabiano Rosas read_neon_element32(tmp, reg & 7, reg >> 4, MO_32); 2364*f0984d40SFabiano Rosas if (reg & 8) { 2365*f0984d40SFabiano Rosas gen_neon_dup_high16(tmp); 2366*f0984d40SFabiano Rosas } else { 2367*f0984d40SFabiano Rosas gen_neon_dup_low16(tmp); 2368*f0984d40SFabiano Rosas } 2369*f0984d40SFabiano Rosas } else { 2370*f0984d40SFabiano Rosas read_neon_element32(tmp, reg & 15, reg >> 4, MO_32); 2371*f0984d40SFabiano Rosas } 2372*f0984d40SFabiano Rosas return tmp; 2373*f0984d40SFabiano Rosas } 2374*f0984d40SFabiano Rosas 2375*f0984d40SFabiano Rosas static bool do_2scalar(DisasContext *s, arg_2scalar *a, 2376*f0984d40SFabiano Rosas NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn) 2377*f0984d40SFabiano Rosas { 2378*f0984d40SFabiano Rosas /* 2379*f0984d40SFabiano Rosas * Two registers and a scalar: perform an operation between 2380*f0984d40SFabiano Rosas * the input elements and the scalar, and then possibly 2381*f0984d40SFabiano Rosas * perform an accumulation operation of that result into the 2382*f0984d40SFabiano Rosas * destination. 2383*f0984d40SFabiano Rosas */ 2384*f0984d40SFabiano Rosas TCGv_i32 scalar, tmp; 2385*f0984d40SFabiano Rosas int pass; 2386*f0984d40SFabiano Rosas 2387*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2388*f0984d40SFabiano Rosas return false; 2389*f0984d40SFabiano Rosas } 2390*f0984d40SFabiano Rosas 2391*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 2392*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 2393*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 2394*f0984d40SFabiano Rosas return false; 2395*f0984d40SFabiano Rosas } 2396*f0984d40SFabiano Rosas 2397*f0984d40SFabiano Rosas if (!opfn) { 2398*f0984d40SFabiano Rosas /* Bad size (including size == 3, which is a different insn group) */ 2399*f0984d40SFabiano Rosas return false; 2400*f0984d40SFabiano Rosas } 2401*f0984d40SFabiano Rosas 2402*f0984d40SFabiano Rosas if (a->q && ((a->vd | a->vn) & 1)) { 2403*f0984d40SFabiano Rosas return false; 2404*f0984d40SFabiano Rosas } 2405*f0984d40SFabiano Rosas 2406*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 2407*f0984d40SFabiano Rosas return true; 2408*f0984d40SFabiano Rosas } 2409*f0984d40SFabiano Rosas 2410*f0984d40SFabiano Rosas scalar = neon_get_scalar(a->size, a->vm); 2411*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 2412*f0984d40SFabiano Rosas 2413*f0984d40SFabiano Rosas for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2414*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vn, pass, MO_32); 2415*f0984d40SFabiano Rosas opfn(tmp, tmp, scalar); 2416*f0984d40SFabiano Rosas if (accfn) { 2417*f0984d40SFabiano Rosas TCGv_i32 rd = tcg_temp_new_i32(); 2418*f0984d40SFabiano Rosas read_neon_element32(rd, a->vd, pass, MO_32); 2419*f0984d40SFabiano Rosas accfn(tmp, rd, tmp); 2420*f0984d40SFabiano Rosas tcg_temp_free_i32(rd); 2421*f0984d40SFabiano Rosas } 2422*f0984d40SFabiano Rosas write_neon_element32(tmp, a->vd, pass, MO_32); 2423*f0984d40SFabiano Rosas } 2424*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 2425*f0984d40SFabiano Rosas tcg_temp_free_i32(scalar); 2426*f0984d40SFabiano Rosas return true; 2427*f0984d40SFabiano Rosas } 2428*f0984d40SFabiano Rosas 2429*f0984d40SFabiano Rosas static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a) 2430*f0984d40SFabiano Rosas { 2431*f0984d40SFabiano Rosas static NeonGenTwoOpFn * const opfn[] = { 2432*f0984d40SFabiano Rosas NULL, 2433*f0984d40SFabiano Rosas gen_helper_neon_mul_u16, 2434*f0984d40SFabiano Rosas tcg_gen_mul_i32, 2435*f0984d40SFabiano Rosas NULL, 2436*f0984d40SFabiano Rosas }; 2437*f0984d40SFabiano Rosas 2438*f0984d40SFabiano Rosas return do_2scalar(s, a, opfn[a->size], NULL); 2439*f0984d40SFabiano Rosas } 2440*f0984d40SFabiano Rosas 2441*f0984d40SFabiano Rosas static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a) 2442*f0984d40SFabiano Rosas { 2443*f0984d40SFabiano Rosas static NeonGenTwoOpFn * const opfn[] = { 2444*f0984d40SFabiano Rosas NULL, 2445*f0984d40SFabiano Rosas gen_helper_neon_mul_u16, 2446*f0984d40SFabiano Rosas tcg_gen_mul_i32, 2447*f0984d40SFabiano Rosas NULL, 2448*f0984d40SFabiano Rosas }; 2449*f0984d40SFabiano Rosas static NeonGenTwoOpFn * const accfn[] = { 2450*f0984d40SFabiano Rosas NULL, 2451*f0984d40SFabiano Rosas gen_helper_neon_add_u16, 2452*f0984d40SFabiano Rosas tcg_gen_add_i32, 2453*f0984d40SFabiano Rosas NULL, 2454*f0984d40SFabiano Rosas }; 2455*f0984d40SFabiano Rosas 2456*f0984d40SFabiano Rosas return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2457*f0984d40SFabiano Rosas } 2458*f0984d40SFabiano Rosas 2459*f0984d40SFabiano Rosas static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) 2460*f0984d40SFabiano Rosas { 2461*f0984d40SFabiano Rosas static NeonGenTwoOpFn * const opfn[] = { 2462*f0984d40SFabiano Rosas NULL, 2463*f0984d40SFabiano Rosas gen_helper_neon_mul_u16, 2464*f0984d40SFabiano Rosas tcg_gen_mul_i32, 2465*f0984d40SFabiano Rosas NULL, 2466*f0984d40SFabiano Rosas }; 2467*f0984d40SFabiano Rosas static NeonGenTwoOpFn * const accfn[] = { 2468*f0984d40SFabiano Rosas NULL, 2469*f0984d40SFabiano Rosas gen_helper_neon_sub_u16, 2470*f0984d40SFabiano Rosas tcg_gen_sub_i32, 2471*f0984d40SFabiano Rosas NULL, 2472*f0984d40SFabiano Rosas }; 2473*f0984d40SFabiano Rosas 2474*f0984d40SFabiano Rosas return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2475*f0984d40SFabiano Rosas } 2476*f0984d40SFabiano Rosas 2477*f0984d40SFabiano Rosas static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a, 2478*f0984d40SFabiano Rosas gen_helper_gvec_3_ptr *fn) 2479*f0984d40SFabiano Rosas { 2480*f0984d40SFabiano Rosas /* Two registers and a scalar, using gvec */ 2481*f0984d40SFabiano Rosas int vec_size = a->q ? 16 : 8; 2482*f0984d40SFabiano Rosas int rd_ofs = neon_full_reg_offset(a->vd); 2483*f0984d40SFabiano Rosas int rn_ofs = neon_full_reg_offset(a->vn); 2484*f0984d40SFabiano Rosas int rm_ofs; 2485*f0984d40SFabiano Rosas int idx; 2486*f0984d40SFabiano Rosas TCGv_ptr fpstatus; 2487*f0984d40SFabiano Rosas 2488*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2489*f0984d40SFabiano Rosas return false; 2490*f0984d40SFabiano Rosas } 2491*f0984d40SFabiano Rosas 2492*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 2493*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 2494*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 2495*f0984d40SFabiano Rosas return false; 2496*f0984d40SFabiano Rosas } 2497*f0984d40SFabiano Rosas 2498*f0984d40SFabiano Rosas if (!fn) { 2499*f0984d40SFabiano Rosas /* Bad size (including size == 3, which is a different insn group) */ 2500*f0984d40SFabiano Rosas return false; 2501*f0984d40SFabiano Rosas } 2502*f0984d40SFabiano Rosas 2503*f0984d40SFabiano Rosas if (a->q && ((a->vd | a->vn) & 1)) { 2504*f0984d40SFabiano Rosas return false; 2505*f0984d40SFabiano Rosas } 2506*f0984d40SFabiano Rosas 2507*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 2508*f0984d40SFabiano Rosas return true; 2509*f0984d40SFabiano Rosas } 2510*f0984d40SFabiano Rosas 2511*f0984d40SFabiano Rosas /* a->vm is M:Vm, which encodes both register and index */ 2512*f0984d40SFabiano Rosas idx = extract32(a->vm, a->size + 2, 2); 2513*f0984d40SFabiano Rosas a->vm = extract32(a->vm, 0, a->size + 2); 2514*f0984d40SFabiano Rosas rm_ofs = neon_full_reg_offset(a->vm); 2515*f0984d40SFabiano Rosas 2516*f0984d40SFabiano Rosas fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD); 2517*f0984d40SFabiano Rosas tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus, 2518*f0984d40SFabiano Rosas vec_size, vec_size, idx, fn); 2519*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpstatus); 2520*f0984d40SFabiano Rosas return true; 2521*f0984d40SFabiano Rosas } 2522*f0984d40SFabiano Rosas 2523*f0984d40SFabiano Rosas #define DO_VMUL_F_2sc(NAME, FUNC) \ 2524*f0984d40SFabiano Rosas static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \ 2525*f0984d40SFabiano Rosas { \ 2526*f0984d40SFabiano Rosas static gen_helper_gvec_3_ptr * const opfn[] = { \ 2527*f0984d40SFabiano Rosas NULL, \ 2528*f0984d40SFabiano Rosas gen_helper_##FUNC##_h, \ 2529*f0984d40SFabiano Rosas gen_helper_##FUNC##_s, \ 2530*f0984d40SFabiano Rosas NULL, \ 2531*f0984d40SFabiano Rosas }; \ 2532*f0984d40SFabiano Rosas if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \ 2533*f0984d40SFabiano Rosas return false; \ 2534*f0984d40SFabiano Rosas } \ 2535*f0984d40SFabiano Rosas return do_2scalar_fp_vec(s, a, opfn[a->size]); \ 2536*f0984d40SFabiano Rosas } 2537*f0984d40SFabiano Rosas 2538*f0984d40SFabiano Rosas DO_VMUL_F_2sc(VMUL, gvec_fmul_idx) 2539*f0984d40SFabiano Rosas DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx) 2540*f0984d40SFabiano Rosas DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx) 2541*f0984d40SFabiano Rosas 2542*f0984d40SFabiano Rosas WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16) 2543*f0984d40SFabiano Rosas WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32) 2544*f0984d40SFabiano Rosas WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16) 2545*f0984d40SFabiano Rosas WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32) 2546*f0984d40SFabiano Rosas 2547*f0984d40SFabiano Rosas static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a) 2548*f0984d40SFabiano Rosas { 2549*f0984d40SFabiano Rosas static NeonGenTwoOpFn * const opfn[] = { 2550*f0984d40SFabiano Rosas NULL, 2551*f0984d40SFabiano Rosas gen_VQDMULH_16, 2552*f0984d40SFabiano Rosas gen_VQDMULH_32, 2553*f0984d40SFabiano Rosas NULL, 2554*f0984d40SFabiano Rosas }; 2555*f0984d40SFabiano Rosas 2556*f0984d40SFabiano Rosas return do_2scalar(s, a, opfn[a->size], NULL); 2557*f0984d40SFabiano Rosas } 2558*f0984d40SFabiano Rosas 2559*f0984d40SFabiano Rosas static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a) 2560*f0984d40SFabiano Rosas { 2561*f0984d40SFabiano Rosas static NeonGenTwoOpFn * const opfn[] = { 2562*f0984d40SFabiano Rosas NULL, 2563*f0984d40SFabiano Rosas gen_VQRDMULH_16, 2564*f0984d40SFabiano Rosas gen_VQRDMULH_32, 2565*f0984d40SFabiano Rosas NULL, 2566*f0984d40SFabiano Rosas }; 2567*f0984d40SFabiano Rosas 2568*f0984d40SFabiano Rosas return do_2scalar(s, a, opfn[a->size], NULL); 2569*f0984d40SFabiano Rosas } 2570*f0984d40SFabiano Rosas 2571*f0984d40SFabiano Rosas static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, 2572*f0984d40SFabiano Rosas NeonGenThreeOpEnvFn *opfn) 2573*f0984d40SFabiano Rosas { 2574*f0984d40SFabiano Rosas /* 2575*f0984d40SFabiano Rosas * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn 2576*f0984d40SFabiano Rosas * performs a kind of fused op-then-accumulate using a helper 2577*f0984d40SFabiano Rosas * function that takes all of rd, rn and the scalar at once. 2578*f0984d40SFabiano Rosas */ 2579*f0984d40SFabiano Rosas TCGv_i32 scalar, rn, rd; 2580*f0984d40SFabiano Rosas int pass; 2581*f0984d40SFabiano Rosas 2582*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2583*f0984d40SFabiano Rosas return false; 2584*f0984d40SFabiano Rosas } 2585*f0984d40SFabiano Rosas 2586*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_rdm, s)) { 2587*f0984d40SFabiano Rosas return false; 2588*f0984d40SFabiano Rosas } 2589*f0984d40SFabiano Rosas 2590*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 2591*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 2592*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 2593*f0984d40SFabiano Rosas return false; 2594*f0984d40SFabiano Rosas } 2595*f0984d40SFabiano Rosas 2596*f0984d40SFabiano Rosas if (!opfn) { 2597*f0984d40SFabiano Rosas /* Bad size (including size == 3, which is a different insn group) */ 2598*f0984d40SFabiano Rosas return false; 2599*f0984d40SFabiano Rosas } 2600*f0984d40SFabiano Rosas 2601*f0984d40SFabiano Rosas if (a->q && ((a->vd | a->vn) & 1)) { 2602*f0984d40SFabiano Rosas return false; 2603*f0984d40SFabiano Rosas } 2604*f0984d40SFabiano Rosas 2605*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 2606*f0984d40SFabiano Rosas return true; 2607*f0984d40SFabiano Rosas } 2608*f0984d40SFabiano Rosas 2609*f0984d40SFabiano Rosas scalar = neon_get_scalar(a->size, a->vm); 2610*f0984d40SFabiano Rosas rn = tcg_temp_new_i32(); 2611*f0984d40SFabiano Rosas rd = tcg_temp_new_i32(); 2612*f0984d40SFabiano Rosas 2613*f0984d40SFabiano Rosas for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2614*f0984d40SFabiano Rosas read_neon_element32(rn, a->vn, pass, MO_32); 2615*f0984d40SFabiano Rosas read_neon_element32(rd, a->vd, pass, MO_32); 2616*f0984d40SFabiano Rosas opfn(rd, cpu_env, rn, scalar, rd); 2617*f0984d40SFabiano Rosas write_neon_element32(rd, a->vd, pass, MO_32); 2618*f0984d40SFabiano Rosas } 2619*f0984d40SFabiano Rosas tcg_temp_free_i32(rn); 2620*f0984d40SFabiano Rosas tcg_temp_free_i32(rd); 2621*f0984d40SFabiano Rosas tcg_temp_free_i32(scalar); 2622*f0984d40SFabiano Rosas 2623*f0984d40SFabiano Rosas return true; 2624*f0984d40SFabiano Rosas } 2625*f0984d40SFabiano Rosas 2626*f0984d40SFabiano Rosas static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a) 2627*f0984d40SFabiano Rosas { 2628*f0984d40SFabiano Rosas static NeonGenThreeOpEnvFn *opfn[] = { 2629*f0984d40SFabiano Rosas NULL, 2630*f0984d40SFabiano Rosas gen_helper_neon_qrdmlah_s16, 2631*f0984d40SFabiano Rosas gen_helper_neon_qrdmlah_s32, 2632*f0984d40SFabiano Rosas NULL, 2633*f0984d40SFabiano Rosas }; 2634*f0984d40SFabiano Rosas return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2635*f0984d40SFabiano Rosas } 2636*f0984d40SFabiano Rosas 2637*f0984d40SFabiano Rosas static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a) 2638*f0984d40SFabiano Rosas { 2639*f0984d40SFabiano Rosas static NeonGenThreeOpEnvFn *opfn[] = { 2640*f0984d40SFabiano Rosas NULL, 2641*f0984d40SFabiano Rosas gen_helper_neon_qrdmlsh_s16, 2642*f0984d40SFabiano Rosas gen_helper_neon_qrdmlsh_s32, 2643*f0984d40SFabiano Rosas NULL, 2644*f0984d40SFabiano Rosas }; 2645*f0984d40SFabiano Rosas return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2646*f0984d40SFabiano Rosas } 2647*f0984d40SFabiano Rosas 2648*f0984d40SFabiano Rosas static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, 2649*f0984d40SFabiano Rosas NeonGenTwoOpWidenFn *opfn, 2650*f0984d40SFabiano Rosas NeonGenTwo64OpFn *accfn) 2651*f0984d40SFabiano Rosas { 2652*f0984d40SFabiano Rosas /* 2653*f0984d40SFabiano Rosas * Two registers and a scalar, long operations: perform an 2654*f0984d40SFabiano Rosas * operation on the input elements and the scalar which produces 2655*f0984d40SFabiano Rosas * a double-width result, and then possibly perform an accumulation 2656*f0984d40SFabiano Rosas * operation of that result into the destination. 2657*f0984d40SFabiano Rosas */ 2658*f0984d40SFabiano Rosas TCGv_i32 scalar, rn; 2659*f0984d40SFabiano Rosas TCGv_i64 rn0_64, rn1_64; 2660*f0984d40SFabiano Rosas 2661*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2662*f0984d40SFabiano Rosas return false; 2663*f0984d40SFabiano Rosas } 2664*f0984d40SFabiano Rosas 2665*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 2666*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 2667*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 2668*f0984d40SFabiano Rosas return false; 2669*f0984d40SFabiano Rosas } 2670*f0984d40SFabiano Rosas 2671*f0984d40SFabiano Rosas if (!opfn) { 2672*f0984d40SFabiano Rosas /* Bad size (including size == 3, which is a different insn group) */ 2673*f0984d40SFabiano Rosas return false; 2674*f0984d40SFabiano Rosas } 2675*f0984d40SFabiano Rosas 2676*f0984d40SFabiano Rosas if (a->vd & 1) { 2677*f0984d40SFabiano Rosas return false; 2678*f0984d40SFabiano Rosas } 2679*f0984d40SFabiano Rosas 2680*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 2681*f0984d40SFabiano Rosas return true; 2682*f0984d40SFabiano Rosas } 2683*f0984d40SFabiano Rosas 2684*f0984d40SFabiano Rosas scalar = neon_get_scalar(a->size, a->vm); 2685*f0984d40SFabiano Rosas 2686*f0984d40SFabiano Rosas /* Load all inputs before writing any outputs, in case of overlap */ 2687*f0984d40SFabiano Rosas rn = tcg_temp_new_i32(); 2688*f0984d40SFabiano Rosas read_neon_element32(rn, a->vn, 0, MO_32); 2689*f0984d40SFabiano Rosas rn0_64 = tcg_temp_new_i64(); 2690*f0984d40SFabiano Rosas opfn(rn0_64, rn, scalar); 2691*f0984d40SFabiano Rosas 2692*f0984d40SFabiano Rosas read_neon_element32(rn, a->vn, 1, MO_32); 2693*f0984d40SFabiano Rosas rn1_64 = tcg_temp_new_i64(); 2694*f0984d40SFabiano Rosas opfn(rn1_64, rn, scalar); 2695*f0984d40SFabiano Rosas tcg_temp_free_i32(rn); 2696*f0984d40SFabiano Rosas tcg_temp_free_i32(scalar); 2697*f0984d40SFabiano Rosas 2698*f0984d40SFabiano Rosas if (accfn) { 2699*f0984d40SFabiano Rosas TCGv_i64 t64 = tcg_temp_new_i64(); 2700*f0984d40SFabiano Rosas read_neon_element64(t64, a->vd, 0, MO_64); 2701*f0984d40SFabiano Rosas accfn(rn0_64, t64, rn0_64); 2702*f0984d40SFabiano Rosas read_neon_element64(t64, a->vd, 1, MO_64); 2703*f0984d40SFabiano Rosas accfn(rn1_64, t64, rn1_64); 2704*f0984d40SFabiano Rosas tcg_temp_free_i64(t64); 2705*f0984d40SFabiano Rosas } 2706*f0984d40SFabiano Rosas 2707*f0984d40SFabiano Rosas write_neon_element64(rn0_64, a->vd, 0, MO_64); 2708*f0984d40SFabiano Rosas write_neon_element64(rn1_64, a->vd, 1, MO_64); 2709*f0984d40SFabiano Rosas tcg_temp_free_i64(rn0_64); 2710*f0984d40SFabiano Rosas tcg_temp_free_i64(rn1_64); 2711*f0984d40SFabiano Rosas return true; 2712*f0984d40SFabiano Rosas } 2713*f0984d40SFabiano Rosas 2714*f0984d40SFabiano Rosas static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a) 2715*f0984d40SFabiano Rosas { 2716*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2717*f0984d40SFabiano Rosas NULL, 2718*f0984d40SFabiano Rosas gen_helper_neon_mull_s16, 2719*f0984d40SFabiano Rosas gen_mull_s32, 2720*f0984d40SFabiano Rosas NULL, 2721*f0984d40SFabiano Rosas }; 2722*f0984d40SFabiano Rosas 2723*f0984d40SFabiano Rosas return do_2scalar_long(s, a, opfn[a->size], NULL); 2724*f0984d40SFabiano Rosas } 2725*f0984d40SFabiano Rosas 2726*f0984d40SFabiano Rosas static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a) 2727*f0984d40SFabiano Rosas { 2728*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2729*f0984d40SFabiano Rosas NULL, 2730*f0984d40SFabiano Rosas gen_helper_neon_mull_u16, 2731*f0984d40SFabiano Rosas gen_mull_u32, 2732*f0984d40SFabiano Rosas NULL, 2733*f0984d40SFabiano Rosas }; 2734*f0984d40SFabiano Rosas 2735*f0984d40SFabiano Rosas return do_2scalar_long(s, a, opfn[a->size], NULL); 2736*f0984d40SFabiano Rosas } 2737*f0984d40SFabiano Rosas 2738*f0984d40SFabiano Rosas #define DO_VMLAL_2SC(INSN, MULL, ACC) \ 2739*f0984d40SFabiano Rosas static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \ 2740*f0984d40SFabiano Rosas { \ 2741*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { \ 2742*f0984d40SFabiano Rosas NULL, \ 2743*f0984d40SFabiano Rosas gen_helper_neon_##MULL##16, \ 2744*f0984d40SFabiano Rosas gen_##MULL##32, \ 2745*f0984d40SFabiano Rosas NULL, \ 2746*f0984d40SFabiano Rosas }; \ 2747*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const accfn[] = { \ 2748*f0984d40SFabiano Rosas NULL, \ 2749*f0984d40SFabiano Rosas gen_helper_neon_##ACC##l_u32, \ 2750*f0984d40SFabiano Rosas tcg_gen_##ACC##_i64, \ 2751*f0984d40SFabiano Rosas NULL, \ 2752*f0984d40SFabiano Rosas }; \ 2753*f0984d40SFabiano Rosas return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \ 2754*f0984d40SFabiano Rosas } 2755*f0984d40SFabiano Rosas 2756*f0984d40SFabiano Rosas DO_VMLAL_2SC(VMLAL_S, mull_s, add) 2757*f0984d40SFabiano Rosas DO_VMLAL_2SC(VMLAL_U, mull_u, add) 2758*f0984d40SFabiano Rosas DO_VMLAL_2SC(VMLSL_S, mull_s, sub) 2759*f0984d40SFabiano Rosas DO_VMLAL_2SC(VMLSL_U, mull_u, sub) 2760*f0984d40SFabiano Rosas 2761*f0984d40SFabiano Rosas static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a) 2762*f0984d40SFabiano Rosas { 2763*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2764*f0984d40SFabiano Rosas NULL, 2765*f0984d40SFabiano Rosas gen_VQDMULL_16, 2766*f0984d40SFabiano Rosas gen_VQDMULL_32, 2767*f0984d40SFabiano Rosas NULL, 2768*f0984d40SFabiano Rosas }; 2769*f0984d40SFabiano Rosas 2770*f0984d40SFabiano Rosas return do_2scalar_long(s, a, opfn[a->size], NULL); 2771*f0984d40SFabiano Rosas } 2772*f0984d40SFabiano Rosas 2773*f0984d40SFabiano Rosas static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a) 2774*f0984d40SFabiano Rosas { 2775*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2776*f0984d40SFabiano Rosas NULL, 2777*f0984d40SFabiano Rosas gen_VQDMULL_16, 2778*f0984d40SFabiano Rosas gen_VQDMULL_32, 2779*f0984d40SFabiano Rosas NULL, 2780*f0984d40SFabiano Rosas }; 2781*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const accfn[] = { 2782*f0984d40SFabiano Rosas NULL, 2783*f0984d40SFabiano Rosas gen_VQDMLAL_acc_16, 2784*f0984d40SFabiano Rosas gen_VQDMLAL_acc_32, 2785*f0984d40SFabiano Rosas NULL, 2786*f0984d40SFabiano Rosas }; 2787*f0984d40SFabiano Rosas 2788*f0984d40SFabiano Rosas return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2789*f0984d40SFabiano Rosas } 2790*f0984d40SFabiano Rosas 2791*f0984d40SFabiano Rosas static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a) 2792*f0984d40SFabiano Rosas { 2793*f0984d40SFabiano Rosas static NeonGenTwoOpWidenFn * const opfn[] = { 2794*f0984d40SFabiano Rosas NULL, 2795*f0984d40SFabiano Rosas gen_VQDMULL_16, 2796*f0984d40SFabiano Rosas gen_VQDMULL_32, 2797*f0984d40SFabiano Rosas NULL, 2798*f0984d40SFabiano Rosas }; 2799*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const accfn[] = { 2800*f0984d40SFabiano Rosas NULL, 2801*f0984d40SFabiano Rosas gen_VQDMLSL_acc_16, 2802*f0984d40SFabiano Rosas gen_VQDMLSL_acc_32, 2803*f0984d40SFabiano Rosas NULL, 2804*f0984d40SFabiano Rosas }; 2805*f0984d40SFabiano Rosas 2806*f0984d40SFabiano Rosas return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2807*f0984d40SFabiano Rosas } 2808*f0984d40SFabiano Rosas 2809*f0984d40SFabiano Rosas static bool trans_VEXT(DisasContext *s, arg_VEXT *a) 2810*f0984d40SFabiano Rosas { 2811*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2812*f0984d40SFabiano Rosas return false; 2813*f0984d40SFabiano Rosas } 2814*f0984d40SFabiano Rosas 2815*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 2816*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 2817*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 2818*f0984d40SFabiano Rosas return false; 2819*f0984d40SFabiano Rosas } 2820*f0984d40SFabiano Rosas 2821*f0984d40SFabiano Rosas if ((a->vn | a->vm | a->vd) & a->q) { 2822*f0984d40SFabiano Rosas return false; 2823*f0984d40SFabiano Rosas } 2824*f0984d40SFabiano Rosas 2825*f0984d40SFabiano Rosas if (a->imm > 7 && !a->q) { 2826*f0984d40SFabiano Rosas return false; 2827*f0984d40SFabiano Rosas } 2828*f0984d40SFabiano Rosas 2829*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 2830*f0984d40SFabiano Rosas return true; 2831*f0984d40SFabiano Rosas } 2832*f0984d40SFabiano Rosas 2833*f0984d40SFabiano Rosas if (!a->q) { 2834*f0984d40SFabiano Rosas /* Extract 64 bits from <Vm:Vn> */ 2835*f0984d40SFabiano Rosas TCGv_i64 left, right, dest; 2836*f0984d40SFabiano Rosas 2837*f0984d40SFabiano Rosas left = tcg_temp_new_i64(); 2838*f0984d40SFabiano Rosas right = tcg_temp_new_i64(); 2839*f0984d40SFabiano Rosas dest = tcg_temp_new_i64(); 2840*f0984d40SFabiano Rosas 2841*f0984d40SFabiano Rosas read_neon_element64(right, a->vn, 0, MO_64); 2842*f0984d40SFabiano Rosas read_neon_element64(left, a->vm, 0, MO_64); 2843*f0984d40SFabiano Rosas tcg_gen_extract2_i64(dest, right, left, a->imm * 8); 2844*f0984d40SFabiano Rosas write_neon_element64(dest, a->vd, 0, MO_64); 2845*f0984d40SFabiano Rosas 2846*f0984d40SFabiano Rosas tcg_temp_free_i64(left); 2847*f0984d40SFabiano Rosas tcg_temp_free_i64(right); 2848*f0984d40SFabiano Rosas tcg_temp_free_i64(dest); 2849*f0984d40SFabiano Rosas } else { 2850*f0984d40SFabiano Rosas /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */ 2851*f0984d40SFabiano Rosas TCGv_i64 left, middle, right, destleft, destright; 2852*f0984d40SFabiano Rosas 2853*f0984d40SFabiano Rosas left = tcg_temp_new_i64(); 2854*f0984d40SFabiano Rosas middle = tcg_temp_new_i64(); 2855*f0984d40SFabiano Rosas right = tcg_temp_new_i64(); 2856*f0984d40SFabiano Rosas destleft = tcg_temp_new_i64(); 2857*f0984d40SFabiano Rosas destright = tcg_temp_new_i64(); 2858*f0984d40SFabiano Rosas 2859*f0984d40SFabiano Rosas if (a->imm < 8) { 2860*f0984d40SFabiano Rosas read_neon_element64(right, a->vn, 0, MO_64); 2861*f0984d40SFabiano Rosas read_neon_element64(middle, a->vn, 1, MO_64); 2862*f0984d40SFabiano Rosas tcg_gen_extract2_i64(destright, right, middle, a->imm * 8); 2863*f0984d40SFabiano Rosas read_neon_element64(left, a->vm, 0, MO_64); 2864*f0984d40SFabiano Rosas tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8); 2865*f0984d40SFabiano Rosas } else { 2866*f0984d40SFabiano Rosas read_neon_element64(right, a->vn, 1, MO_64); 2867*f0984d40SFabiano Rosas read_neon_element64(middle, a->vm, 0, MO_64); 2868*f0984d40SFabiano Rosas tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8); 2869*f0984d40SFabiano Rosas read_neon_element64(left, a->vm, 1, MO_64); 2870*f0984d40SFabiano Rosas tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8); 2871*f0984d40SFabiano Rosas } 2872*f0984d40SFabiano Rosas 2873*f0984d40SFabiano Rosas write_neon_element64(destright, a->vd, 0, MO_64); 2874*f0984d40SFabiano Rosas write_neon_element64(destleft, a->vd, 1, MO_64); 2875*f0984d40SFabiano Rosas 2876*f0984d40SFabiano Rosas tcg_temp_free_i64(destright); 2877*f0984d40SFabiano Rosas tcg_temp_free_i64(destleft); 2878*f0984d40SFabiano Rosas tcg_temp_free_i64(right); 2879*f0984d40SFabiano Rosas tcg_temp_free_i64(middle); 2880*f0984d40SFabiano Rosas tcg_temp_free_i64(left); 2881*f0984d40SFabiano Rosas } 2882*f0984d40SFabiano Rosas return true; 2883*f0984d40SFabiano Rosas } 2884*f0984d40SFabiano Rosas 2885*f0984d40SFabiano Rosas static bool trans_VTBL(DisasContext *s, arg_VTBL *a) 2886*f0984d40SFabiano Rosas { 2887*f0984d40SFabiano Rosas TCGv_i64 val, def; 2888*f0984d40SFabiano Rosas TCGv_i32 desc; 2889*f0984d40SFabiano Rosas 2890*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2891*f0984d40SFabiano Rosas return false; 2892*f0984d40SFabiano Rosas } 2893*f0984d40SFabiano Rosas 2894*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 2895*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 2896*f0984d40SFabiano Rosas ((a->vd | a->vn | a->vm) & 0x10)) { 2897*f0984d40SFabiano Rosas return false; 2898*f0984d40SFabiano Rosas } 2899*f0984d40SFabiano Rosas 2900*f0984d40SFabiano Rosas if ((a->vn + a->len + 1) > 32) { 2901*f0984d40SFabiano Rosas /* 2902*f0984d40SFabiano Rosas * This is UNPREDICTABLE; we choose to UNDEF to avoid the 2903*f0984d40SFabiano Rosas * helper function running off the end of the register file. 2904*f0984d40SFabiano Rosas */ 2905*f0984d40SFabiano Rosas return false; 2906*f0984d40SFabiano Rosas } 2907*f0984d40SFabiano Rosas 2908*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 2909*f0984d40SFabiano Rosas return true; 2910*f0984d40SFabiano Rosas } 2911*f0984d40SFabiano Rosas 2912*f0984d40SFabiano Rosas desc = tcg_constant_i32((a->vn << 2) | a->len); 2913*f0984d40SFabiano Rosas def = tcg_temp_new_i64(); 2914*f0984d40SFabiano Rosas if (a->op) { 2915*f0984d40SFabiano Rosas read_neon_element64(def, a->vd, 0, MO_64); 2916*f0984d40SFabiano Rosas } else { 2917*f0984d40SFabiano Rosas tcg_gen_movi_i64(def, 0); 2918*f0984d40SFabiano Rosas } 2919*f0984d40SFabiano Rosas val = tcg_temp_new_i64(); 2920*f0984d40SFabiano Rosas read_neon_element64(val, a->vm, 0, MO_64); 2921*f0984d40SFabiano Rosas 2922*f0984d40SFabiano Rosas gen_helper_neon_tbl(val, cpu_env, desc, val, def); 2923*f0984d40SFabiano Rosas write_neon_element64(val, a->vd, 0, MO_64); 2924*f0984d40SFabiano Rosas 2925*f0984d40SFabiano Rosas tcg_temp_free_i64(def); 2926*f0984d40SFabiano Rosas tcg_temp_free_i64(val); 2927*f0984d40SFabiano Rosas return true; 2928*f0984d40SFabiano Rosas } 2929*f0984d40SFabiano Rosas 2930*f0984d40SFabiano Rosas static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) 2931*f0984d40SFabiano Rosas { 2932*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2933*f0984d40SFabiano Rosas return false; 2934*f0984d40SFabiano Rosas } 2935*f0984d40SFabiano Rosas 2936*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 2937*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 2938*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 2939*f0984d40SFabiano Rosas return false; 2940*f0984d40SFabiano Rosas } 2941*f0984d40SFabiano Rosas 2942*f0984d40SFabiano Rosas if (a->vd & a->q) { 2943*f0984d40SFabiano Rosas return false; 2944*f0984d40SFabiano Rosas } 2945*f0984d40SFabiano Rosas 2946*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 2947*f0984d40SFabiano Rosas return true; 2948*f0984d40SFabiano Rosas } 2949*f0984d40SFabiano Rosas 2950*f0984d40SFabiano Rosas tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd), 2951*f0984d40SFabiano Rosas neon_element_offset(a->vm, a->index, a->size), 2952*f0984d40SFabiano Rosas a->q ? 16 : 8, a->q ? 16 : 8); 2953*f0984d40SFabiano Rosas return true; 2954*f0984d40SFabiano Rosas } 2955*f0984d40SFabiano Rosas 2956*f0984d40SFabiano Rosas static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) 2957*f0984d40SFabiano Rosas { 2958*f0984d40SFabiano Rosas int pass, half; 2959*f0984d40SFabiano Rosas TCGv_i32 tmp[2]; 2960*f0984d40SFabiano Rosas 2961*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2962*f0984d40SFabiano Rosas return false; 2963*f0984d40SFabiano Rosas } 2964*f0984d40SFabiano Rosas 2965*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 2966*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 2967*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 2968*f0984d40SFabiano Rosas return false; 2969*f0984d40SFabiano Rosas } 2970*f0984d40SFabiano Rosas 2971*f0984d40SFabiano Rosas if ((a->vd | a->vm) & a->q) { 2972*f0984d40SFabiano Rosas return false; 2973*f0984d40SFabiano Rosas } 2974*f0984d40SFabiano Rosas 2975*f0984d40SFabiano Rosas if (a->size == 3) { 2976*f0984d40SFabiano Rosas return false; 2977*f0984d40SFabiano Rosas } 2978*f0984d40SFabiano Rosas 2979*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 2980*f0984d40SFabiano Rosas return true; 2981*f0984d40SFabiano Rosas } 2982*f0984d40SFabiano Rosas 2983*f0984d40SFabiano Rosas tmp[0] = tcg_temp_new_i32(); 2984*f0984d40SFabiano Rosas tmp[1] = tcg_temp_new_i32(); 2985*f0984d40SFabiano Rosas 2986*f0984d40SFabiano Rosas for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 2987*f0984d40SFabiano Rosas for (half = 0; half < 2; half++) { 2988*f0984d40SFabiano Rosas read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32); 2989*f0984d40SFabiano Rosas switch (a->size) { 2990*f0984d40SFabiano Rosas case 0: 2991*f0984d40SFabiano Rosas tcg_gen_bswap32_i32(tmp[half], tmp[half]); 2992*f0984d40SFabiano Rosas break; 2993*f0984d40SFabiano Rosas case 1: 2994*f0984d40SFabiano Rosas gen_swap_half(tmp[half], tmp[half]); 2995*f0984d40SFabiano Rosas break; 2996*f0984d40SFabiano Rosas case 2: 2997*f0984d40SFabiano Rosas break; 2998*f0984d40SFabiano Rosas default: 2999*f0984d40SFabiano Rosas g_assert_not_reached(); 3000*f0984d40SFabiano Rosas } 3001*f0984d40SFabiano Rosas } 3002*f0984d40SFabiano Rosas write_neon_element32(tmp[1], a->vd, pass * 2, MO_32); 3003*f0984d40SFabiano Rosas write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32); 3004*f0984d40SFabiano Rosas } 3005*f0984d40SFabiano Rosas 3006*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp[0]); 3007*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp[1]); 3008*f0984d40SFabiano Rosas return true; 3009*f0984d40SFabiano Rosas } 3010*f0984d40SFabiano Rosas 3011*f0984d40SFabiano Rosas static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, 3012*f0984d40SFabiano Rosas NeonGenWidenFn *widenfn, 3013*f0984d40SFabiano Rosas NeonGenTwo64OpFn *opfn, 3014*f0984d40SFabiano Rosas NeonGenTwo64OpFn *accfn) 3015*f0984d40SFabiano Rosas { 3016*f0984d40SFabiano Rosas /* 3017*f0984d40SFabiano Rosas * Pairwise long operations: widen both halves of the pair, 3018*f0984d40SFabiano Rosas * combine the pairs with the opfn, and then possibly accumulate 3019*f0984d40SFabiano Rosas * into the destination with the accfn. 3020*f0984d40SFabiano Rosas */ 3021*f0984d40SFabiano Rosas int pass; 3022*f0984d40SFabiano Rosas 3023*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3024*f0984d40SFabiano Rosas return false; 3025*f0984d40SFabiano Rosas } 3026*f0984d40SFabiano Rosas 3027*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3028*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3029*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3030*f0984d40SFabiano Rosas return false; 3031*f0984d40SFabiano Rosas } 3032*f0984d40SFabiano Rosas 3033*f0984d40SFabiano Rosas if ((a->vd | a->vm) & a->q) { 3034*f0984d40SFabiano Rosas return false; 3035*f0984d40SFabiano Rosas } 3036*f0984d40SFabiano Rosas 3037*f0984d40SFabiano Rosas if (!widenfn) { 3038*f0984d40SFabiano Rosas return false; 3039*f0984d40SFabiano Rosas } 3040*f0984d40SFabiano Rosas 3041*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3042*f0984d40SFabiano Rosas return true; 3043*f0984d40SFabiano Rosas } 3044*f0984d40SFabiano Rosas 3045*f0984d40SFabiano Rosas for (pass = 0; pass < a->q + 1; pass++) { 3046*f0984d40SFabiano Rosas TCGv_i32 tmp; 3047*f0984d40SFabiano Rosas TCGv_i64 rm0_64, rm1_64, rd_64; 3048*f0984d40SFabiano Rosas 3049*f0984d40SFabiano Rosas rm0_64 = tcg_temp_new_i64(); 3050*f0984d40SFabiano Rosas rm1_64 = tcg_temp_new_i64(); 3051*f0984d40SFabiano Rosas rd_64 = tcg_temp_new_i64(); 3052*f0984d40SFabiano Rosas 3053*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 3054*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, pass * 2, MO_32); 3055*f0984d40SFabiano Rosas widenfn(rm0_64, tmp); 3056*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32); 3057*f0984d40SFabiano Rosas widenfn(rm1_64, tmp); 3058*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 3059*f0984d40SFabiano Rosas 3060*f0984d40SFabiano Rosas opfn(rd_64, rm0_64, rm1_64); 3061*f0984d40SFabiano Rosas tcg_temp_free_i64(rm0_64); 3062*f0984d40SFabiano Rosas tcg_temp_free_i64(rm1_64); 3063*f0984d40SFabiano Rosas 3064*f0984d40SFabiano Rosas if (accfn) { 3065*f0984d40SFabiano Rosas TCGv_i64 tmp64 = tcg_temp_new_i64(); 3066*f0984d40SFabiano Rosas read_neon_element64(tmp64, a->vd, pass, MO_64); 3067*f0984d40SFabiano Rosas accfn(rd_64, tmp64, rd_64); 3068*f0984d40SFabiano Rosas tcg_temp_free_i64(tmp64); 3069*f0984d40SFabiano Rosas } 3070*f0984d40SFabiano Rosas write_neon_element64(rd_64, a->vd, pass, MO_64); 3071*f0984d40SFabiano Rosas tcg_temp_free_i64(rd_64); 3072*f0984d40SFabiano Rosas } 3073*f0984d40SFabiano Rosas return true; 3074*f0984d40SFabiano Rosas } 3075*f0984d40SFabiano Rosas 3076*f0984d40SFabiano Rosas static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a) 3077*f0984d40SFabiano Rosas { 3078*f0984d40SFabiano Rosas static NeonGenWidenFn * const widenfn[] = { 3079*f0984d40SFabiano Rosas gen_helper_neon_widen_s8, 3080*f0984d40SFabiano Rosas gen_helper_neon_widen_s16, 3081*f0984d40SFabiano Rosas tcg_gen_ext_i32_i64, 3082*f0984d40SFabiano Rosas NULL, 3083*f0984d40SFabiano Rosas }; 3084*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const opfn[] = { 3085*f0984d40SFabiano Rosas gen_helper_neon_paddl_u16, 3086*f0984d40SFabiano Rosas gen_helper_neon_paddl_u32, 3087*f0984d40SFabiano Rosas tcg_gen_add_i64, 3088*f0984d40SFabiano Rosas NULL, 3089*f0984d40SFabiano Rosas }; 3090*f0984d40SFabiano Rosas 3091*f0984d40SFabiano Rosas return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); 3092*f0984d40SFabiano Rosas } 3093*f0984d40SFabiano Rosas 3094*f0984d40SFabiano Rosas static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a) 3095*f0984d40SFabiano Rosas { 3096*f0984d40SFabiano Rosas static NeonGenWidenFn * const widenfn[] = { 3097*f0984d40SFabiano Rosas gen_helper_neon_widen_u8, 3098*f0984d40SFabiano Rosas gen_helper_neon_widen_u16, 3099*f0984d40SFabiano Rosas tcg_gen_extu_i32_i64, 3100*f0984d40SFabiano Rosas NULL, 3101*f0984d40SFabiano Rosas }; 3102*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const opfn[] = { 3103*f0984d40SFabiano Rosas gen_helper_neon_paddl_u16, 3104*f0984d40SFabiano Rosas gen_helper_neon_paddl_u32, 3105*f0984d40SFabiano Rosas tcg_gen_add_i64, 3106*f0984d40SFabiano Rosas NULL, 3107*f0984d40SFabiano Rosas }; 3108*f0984d40SFabiano Rosas 3109*f0984d40SFabiano Rosas return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); 3110*f0984d40SFabiano Rosas } 3111*f0984d40SFabiano Rosas 3112*f0984d40SFabiano Rosas static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a) 3113*f0984d40SFabiano Rosas { 3114*f0984d40SFabiano Rosas static NeonGenWidenFn * const widenfn[] = { 3115*f0984d40SFabiano Rosas gen_helper_neon_widen_s8, 3116*f0984d40SFabiano Rosas gen_helper_neon_widen_s16, 3117*f0984d40SFabiano Rosas tcg_gen_ext_i32_i64, 3118*f0984d40SFabiano Rosas NULL, 3119*f0984d40SFabiano Rosas }; 3120*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const opfn[] = { 3121*f0984d40SFabiano Rosas gen_helper_neon_paddl_u16, 3122*f0984d40SFabiano Rosas gen_helper_neon_paddl_u32, 3123*f0984d40SFabiano Rosas tcg_gen_add_i64, 3124*f0984d40SFabiano Rosas NULL, 3125*f0984d40SFabiano Rosas }; 3126*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const accfn[] = { 3127*f0984d40SFabiano Rosas gen_helper_neon_addl_u16, 3128*f0984d40SFabiano Rosas gen_helper_neon_addl_u32, 3129*f0984d40SFabiano Rosas tcg_gen_add_i64, 3130*f0984d40SFabiano Rosas NULL, 3131*f0984d40SFabiano Rosas }; 3132*f0984d40SFabiano Rosas 3133*f0984d40SFabiano Rosas return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], 3134*f0984d40SFabiano Rosas accfn[a->size]); 3135*f0984d40SFabiano Rosas } 3136*f0984d40SFabiano Rosas 3137*f0984d40SFabiano Rosas static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a) 3138*f0984d40SFabiano Rosas { 3139*f0984d40SFabiano Rosas static NeonGenWidenFn * const widenfn[] = { 3140*f0984d40SFabiano Rosas gen_helper_neon_widen_u8, 3141*f0984d40SFabiano Rosas gen_helper_neon_widen_u16, 3142*f0984d40SFabiano Rosas tcg_gen_extu_i32_i64, 3143*f0984d40SFabiano Rosas NULL, 3144*f0984d40SFabiano Rosas }; 3145*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const opfn[] = { 3146*f0984d40SFabiano Rosas gen_helper_neon_paddl_u16, 3147*f0984d40SFabiano Rosas gen_helper_neon_paddl_u32, 3148*f0984d40SFabiano Rosas tcg_gen_add_i64, 3149*f0984d40SFabiano Rosas NULL, 3150*f0984d40SFabiano Rosas }; 3151*f0984d40SFabiano Rosas static NeonGenTwo64OpFn * const accfn[] = { 3152*f0984d40SFabiano Rosas gen_helper_neon_addl_u16, 3153*f0984d40SFabiano Rosas gen_helper_neon_addl_u32, 3154*f0984d40SFabiano Rosas tcg_gen_add_i64, 3155*f0984d40SFabiano Rosas NULL, 3156*f0984d40SFabiano Rosas }; 3157*f0984d40SFabiano Rosas 3158*f0984d40SFabiano Rosas return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], 3159*f0984d40SFabiano Rosas accfn[a->size]); 3160*f0984d40SFabiano Rosas } 3161*f0984d40SFabiano Rosas 3162*f0984d40SFabiano Rosas typedef void ZipFn(TCGv_ptr, TCGv_ptr); 3163*f0984d40SFabiano Rosas 3164*f0984d40SFabiano Rosas static bool do_zip_uzp(DisasContext *s, arg_2misc *a, 3165*f0984d40SFabiano Rosas ZipFn *fn) 3166*f0984d40SFabiano Rosas { 3167*f0984d40SFabiano Rosas TCGv_ptr pd, pm; 3168*f0984d40SFabiano Rosas 3169*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3170*f0984d40SFabiano Rosas return false; 3171*f0984d40SFabiano Rosas } 3172*f0984d40SFabiano Rosas 3173*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3174*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3175*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3176*f0984d40SFabiano Rosas return false; 3177*f0984d40SFabiano Rosas } 3178*f0984d40SFabiano Rosas 3179*f0984d40SFabiano Rosas if ((a->vd | a->vm) & a->q) { 3180*f0984d40SFabiano Rosas return false; 3181*f0984d40SFabiano Rosas } 3182*f0984d40SFabiano Rosas 3183*f0984d40SFabiano Rosas if (!fn) { 3184*f0984d40SFabiano Rosas /* Bad size or size/q combination */ 3185*f0984d40SFabiano Rosas return false; 3186*f0984d40SFabiano Rosas } 3187*f0984d40SFabiano Rosas 3188*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3189*f0984d40SFabiano Rosas return true; 3190*f0984d40SFabiano Rosas } 3191*f0984d40SFabiano Rosas 3192*f0984d40SFabiano Rosas pd = vfp_reg_ptr(true, a->vd); 3193*f0984d40SFabiano Rosas pm = vfp_reg_ptr(true, a->vm); 3194*f0984d40SFabiano Rosas fn(pd, pm); 3195*f0984d40SFabiano Rosas tcg_temp_free_ptr(pd); 3196*f0984d40SFabiano Rosas tcg_temp_free_ptr(pm); 3197*f0984d40SFabiano Rosas return true; 3198*f0984d40SFabiano Rosas } 3199*f0984d40SFabiano Rosas 3200*f0984d40SFabiano Rosas static bool trans_VUZP(DisasContext *s, arg_2misc *a) 3201*f0984d40SFabiano Rosas { 3202*f0984d40SFabiano Rosas static ZipFn * const fn[2][4] = { 3203*f0984d40SFabiano Rosas { 3204*f0984d40SFabiano Rosas gen_helper_neon_unzip8, 3205*f0984d40SFabiano Rosas gen_helper_neon_unzip16, 3206*f0984d40SFabiano Rosas NULL, 3207*f0984d40SFabiano Rosas NULL, 3208*f0984d40SFabiano Rosas }, { 3209*f0984d40SFabiano Rosas gen_helper_neon_qunzip8, 3210*f0984d40SFabiano Rosas gen_helper_neon_qunzip16, 3211*f0984d40SFabiano Rosas gen_helper_neon_qunzip32, 3212*f0984d40SFabiano Rosas NULL, 3213*f0984d40SFabiano Rosas } 3214*f0984d40SFabiano Rosas }; 3215*f0984d40SFabiano Rosas return do_zip_uzp(s, a, fn[a->q][a->size]); 3216*f0984d40SFabiano Rosas } 3217*f0984d40SFabiano Rosas 3218*f0984d40SFabiano Rosas static bool trans_VZIP(DisasContext *s, arg_2misc *a) 3219*f0984d40SFabiano Rosas { 3220*f0984d40SFabiano Rosas static ZipFn * const fn[2][4] = { 3221*f0984d40SFabiano Rosas { 3222*f0984d40SFabiano Rosas gen_helper_neon_zip8, 3223*f0984d40SFabiano Rosas gen_helper_neon_zip16, 3224*f0984d40SFabiano Rosas NULL, 3225*f0984d40SFabiano Rosas NULL, 3226*f0984d40SFabiano Rosas }, { 3227*f0984d40SFabiano Rosas gen_helper_neon_qzip8, 3228*f0984d40SFabiano Rosas gen_helper_neon_qzip16, 3229*f0984d40SFabiano Rosas gen_helper_neon_qzip32, 3230*f0984d40SFabiano Rosas NULL, 3231*f0984d40SFabiano Rosas } 3232*f0984d40SFabiano Rosas }; 3233*f0984d40SFabiano Rosas return do_zip_uzp(s, a, fn[a->q][a->size]); 3234*f0984d40SFabiano Rosas } 3235*f0984d40SFabiano Rosas 3236*f0984d40SFabiano Rosas static bool do_vmovn(DisasContext *s, arg_2misc *a, 3237*f0984d40SFabiano Rosas NeonGenNarrowEnvFn *narrowfn) 3238*f0984d40SFabiano Rosas { 3239*f0984d40SFabiano Rosas TCGv_i64 rm; 3240*f0984d40SFabiano Rosas TCGv_i32 rd0, rd1; 3241*f0984d40SFabiano Rosas 3242*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3243*f0984d40SFabiano Rosas return false; 3244*f0984d40SFabiano Rosas } 3245*f0984d40SFabiano Rosas 3246*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3247*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3248*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3249*f0984d40SFabiano Rosas return false; 3250*f0984d40SFabiano Rosas } 3251*f0984d40SFabiano Rosas 3252*f0984d40SFabiano Rosas if (a->vm & 1) { 3253*f0984d40SFabiano Rosas return false; 3254*f0984d40SFabiano Rosas } 3255*f0984d40SFabiano Rosas 3256*f0984d40SFabiano Rosas if (!narrowfn) { 3257*f0984d40SFabiano Rosas return false; 3258*f0984d40SFabiano Rosas } 3259*f0984d40SFabiano Rosas 3260*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3261*f0984d40SFabiano Rosas return true; 3262*f0984d40SFabiano Rosas } 3263*f0984d40SFabiano Rosas 3264*f0984d40SFabiano Rosas rm = tcg_temp_new_i64(); 3265*f0984d40SFabiano Rosas rd0 = tcg_temp_new_i32(); 3266*f0984d40SFabiano Rosas rd1 = tcg_temp_new_i32(); 3267*f0984d40SFabiano Rosas 3268*f0984d40SFabiano Rosas read_neon_element64(rm, a->vm, 0, MO_64); 3269*f0984d40SFabiano Rosas narrowfn(rd0, cpu_env, rm); 3270*f0984d40SFabiano Rosas read_neon_element64(rm, a->vm, 1, MO_64); 3271*f0984d40SFabiano Rosas narrowfn(rd1, cpu_env, rm); 3272*f0984d40SFabiano Rosas write_neon_element32(rd0, a->vd, 0, MO_32); 3273*f0984d40SFabiano Rosas write_neon_element32(rd1, a->vd, 1, MO_32); 3274*f0984d40SFabiano Rosas tcg_temp_free_i32(rd0); 3275*f0984d40SFabiano Rosas tcg_temp_free_i32(rd1); 3276*f0984d40SFabiano Rosas tcg_temp_free_i64(rm); 3277*f0984d40SFabiano Rosas return true; 3278*f0984d40SFabiano Rosas } 3279*f0984d40SFabiano Rosas 3280*f0984d40SFabiano Rosas #define DO_VMOVN(INSN, FUNC) \ 3281*f0984d40SFabiano Rosas static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3282*f0984d40SFabiano Rosas { \ 3283*f0984d40SFabiano Rosas static NeonGenNarrowEnvFn * const narrowfn[] = { \ 3284*f0984d40SFabiano Rosas FUNC##8, \ 3285*f0984d40SFabiano Rosas FUNC##16, \ 3286*f0984d40SFabiano Rosas FUNC##32, \ 3287*f0984d40SFabiano Rosas NULL, \ 3288*f0984d40SFabiano Rosas }; \ 3289*f0984d40SFabiano Rosas return do_vmovn(s, a, narrowfn[a->size]); \ 3290*f0984d40SFabiano Rosas } 3291*f0984d40SFabiano Rosas 3292*f0984d40SFabiano Rosas DO_VMOVN(VMOVN, gen_neon_narrow_u) 3293*f0984d40SFabiano Rosas DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat) 3294*f0984d40SFabiano Rosas DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s) 3295*f0984d40SFabiano Rosas DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u) 3296*f0984d40SFabiano Rosas 3297*f0984d40SFabiano Rosas static bool trans_VSHLL(DisasContext *s, arg_2misc *a) 3298*f0984d40SFabiano Rosas { 3299*f0984d40SFabiano Rosas TCGv_i32 rm0, rm1; 3300*f0984d40SFabiano Rosas TCGv_i64 rd; 3301*f0984d40SFabiano Rosas static NeonGenWidenFn * const widenfns[] = { 3302*f0984d40SFabiano Rosas gen_helper_neon_widen_u8, 3303*f0984d40SFabiano Rosas gen_helper_neon_widen_u16, 3304*f0984d40SFabiano Rosas tcg_gen_extu_i32_i64, 3305*f0984d40SFabiano Rosas NULL, 3306*f0984d40SFabiano Rosas }; 3307*f0984d40SFabiano Rosas NeonGenWidenFn *widenfn = widenfns[a->size]; 3308*f0984d40SFabiano Rosas 3309*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3310*f0984d40SFabiano Rosas return false; 3311*f0984d40SFabiano Rosas } 3312*f0984d40SFabiano Rosas 3313*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3314*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3315*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3316*f0984d40SFabiano Rosas return false; 3317*f0984d40SFabiano Rosas } 3318*f0984d40SFabiano Rosas 3319*f0984d40SFabiano Rosas if (a->vd & 1) { 3320*f0984d40SFabiano Rosas return false; 3321*f0984d40SFabiano Rosas } 3322*f0984d40SFabiano Rosas 3323*f0984d40SFabiano Rosas if (!widenfn) { 3324*f0984d40SFabiano Rosas return false; 3325*f0984d40SFabiano Rosas } 3326*f0984d40SFabiano Rosas 3327*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3328*f0984d40SFabiano Rosas return true; 3329*f0984d40SFabiano Rosas } 3330*f0984d40SFabiano Rosas 3331*f0984d40SFabiano Rosas rd = tcg_temp_new_i64(); 3332*f0984d40SFabiano Rosas rm0 = tcg_temp_new_i32(); 3333*f0984d40SFabiano Rosas rm1 = tcg_temp_new_i32(); 3334*f0984d40SFabiano Rosas 3335*f0984d40SFabiano Rosas read_neon_element32(rm0, a->vm, 0, MO_32); 3336*f0984d40SFabiano Rosas read_neon_element32(rm1, a->vm, 1, MO_32); 3337*f0984d40SFabiano Rosas 3338*f0984d40SFabiano Rosas widenfn(rd, rm0); 3339*f0984d40SFabiano Rosas tcg_gen_shli_i64(rd, rd, 8 << a->size); 3340*f0984d40SFabiano Rosas write_neon_element64(rd, a->vd, 0, MO_64); 3341*f0984d40SFabiano Rosas widenfn(rd, rm1); 3342*f0984d40SFabiano Rosas tcg_gen_shli_i64(rd, rd, 8 << a->size); 3343*f0984d40SFabiano Rosas write_neon_element64(rd, a->vd, 1, MO_64); 3344*f0984d40SFabiano Rosas 3345*f0984d40SFabiano Rosas tcg_temp_free_i64(rd); 3346*f0984d40SFabiano Rosas tcg_temp_free_i32(rm0); 3347*f0984d40SFabiano Rosas tcg_temp_free_i32(rm1); 3348*f0984d40SFabiano Rosas return true; 3349*f0984d40SFabiano Rosas } 3350*f0984d40SFabiano Rosas 3351*f0984d40SFabiano Rosas static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a) 3352*f0984d40SFabiano Rosas { 3353*f0984d40SFabiano Rosas TCGv_ptr fpst; 3354*f0984d40SFabiano Rosas TCGv_i64 tmp; 3355*f0984d40SFabiano Rosas TCGv_i32 dst0, dst1; 3356*f0984d40SFabiano Rosas 3357*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_bf16, s)) { 3358*f0984d40SFabiano Rosas return false; 3359*f0984d40SFabiano Rosas } 3360*f0984d40SFabiano Rosas 3361*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3362*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3363*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3364*f0984d40SFabiano Rosas return false; 3365*f0984d40SFabiano Rosas } 3366*f0984d40SFabiano Rosas 3367*f0984d40SFabiano Rosas if ((a->vm & 1) || (a->size != 1)) { 3368*f0984d40SFabiano Rosas return false; 3369*f0984d40SFabiano Rosas } 3370*f0984d40SFabiano Rosas 3371*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3372*f0984d40SFabiano Rosas return true; 3373*f0984d40SFabiano Rosas } 3374*f0984d40SFabiano Rosas 3375*f0984d40SFabiano Rosas fpst = fpstatus_ptr(FPST_STD); 3376*f0984d40SFabiano Rosas tmp = tcg_temp_new_i64(); 3377*f0984d40SFabiano Rosas dst0 = tcg_temp_new_i32(); 3378*f0984d40SFabiano Rosas dst1 = tcg_temp_new_i32(); 3379*f0984d40SFabiano Rosas 3380*f0984d40SFabiano Rosas read_neon_element64(tmp, a->vm, 0, MO_64); 3381*f0984d40SFabiano Rosas gen_helper_bfcvt_pair(dst0, tmp, fpst); 3382*f0984d40SFabiano Rosas 3383*f0984d40SFabiano Rosas read_neon_element64(tmp, a->vm, 1, MO_64); 3384*f0984d40SFabiano Rosas gen_helper_bfcvt_pair(dst1, tmp, fpst); 3385*f0984d40SFabiano Rosas 3386*f0984d40SFabiano Rosas write_neon_element32(dst0, a->vd, 0, MO_32); 3387*f0984d40SFabiano Rosas write_neon_element32(dst1, a->vd, 1, MO_32); 3388*f0984d40SFabiano Rosas 3389*f0984d40SFabiano Rosas tcg_temp_free_i64(tmp); 3390*f0984d40SFabiano Rosas tcg_temp_free_i32(dst0); 3391*f0984d40SFabiano Rosas tcg_temp_free_i32(dst1); 3392*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpst); 3393*f0984d40SFabiano Rosas return true; 3394*f0984d40SFabiano Rosas } 3395*f0984d40SFabiano Rosas 3396*f0984d40SFabiano Rosas static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) 3397*f0984d40SFabiano Rosas { 3398*f0984d40SFabiano Rosas TCGv_ptr fpst; 3399*f0984d40SFabiano Rosas TCGv_i32 ahp, tmp, tmp2, tmp3; 3400*f0984d40SFabiano Rosas 3401*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON) || 3402*f0984d40SFabiano Rosas !dc_isar_feature(aa32_fp16_spconv, s)) { 3403*f0984d40SFabiano Rosas return false; 3404*f0984d40SFabiano Rosas } 3405*f0984d40SFabiano Rosas 3406*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3407*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3408*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3409*f0984d40SFabiano Rosas return false; 3410*f0984d40SFabiano Rosas } 3411*f0984d40SFabiano Rosas 3412*f0984d40SFabiano Rosas if ((a->vm & 1) || (a->size != 1)) { 3413*f0984d40SFabiano Rosas return false; 3414*f0984d40SFabiano Rosas } 3415*f0984d40SFabiano Rosas 3416*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3417*f0984d40SFabiano Rosas return true; 3418*f0984d40SFabiano Rosas } 3419*f0984d40SFabiano Rosas 3420*f0984d40SFabiano Rosas fpst = fpstatus_ptr(FPST_STD); 3421*f0984d40SFabiano Rosas ahp = get_ahp_flag(); 3422*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 3423*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, 0, MO_32); 3424*f0984d40SFabiano Rosas gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 3425*f0984d40SFabiano Rosas tmp2 = tcg_temp_new_i32(); 3426*f0984d40SFabiano Rosas read_neon_element32(tmp2, a->vm, 1, MO_32); 3427*f0984d40SFabiano Rosas gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp); 3428*f0984d40SFabiano Rosas tcg_gen_shli_i32(tmp2, tmp2, 16); 3429*f0984d40SFabiano Rosas tcg_gen_or_i32(tmp2, tmp2, tmp); 3430*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, 2, MO_32); 3431*f0984d40SFabiano Rosas gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 3432*f0984d40SFabiano Rosas tmp3 = tcg_temp_new_i32(); 3433*f0984d40SFabiano Rosas read_neon_element32(tmp3, a->vm, 3, MO_32); 3434*f0984d40SFabiano Rosas write_neon_element32(tmp2, a->vd, 0, MO_32); 3435*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp2); 3436*f0984d40SFabiano Rosas gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp); 3437*f0984d40SFabiano Rosas tcg_gen_shli_i32(tmp3, tmp3, 16); 3438*f0984d40SFabiano Rosas tcg_gen_or_i32(tmp3, tmp3, tmp); 3439*f0984d40SFabiano Rosas write_neon_element32(tmp3, a->vd, 1, MO_32); 3440*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp3); 3441*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 3442*f0984d40SFabiano Rosas tcg_temp_free_i32(ahp); 3443*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpst); 3444*f0984d40SFabiano Rosas 3445*f0984d40SFabiano Rosas return true; 3446*f0984d40SFabiano Rosas } 3447*f0984d40SFabiano Rosas 3448*f0984d40SFabiano Rosas static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) 3449*f0984d40SFabiano Rosas { 3450*f0984d40SFabiano Rosas TCGv_ptr fpst; 3451*f0984d40SFabiano Rosas TCGv_i32 ahp, tmp, tmp2, tmp3; 3452*f0984d40SFabiano Rosas 3453*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON) || 3454*f0984d40SFabiano Rosas !dc_isar_feature(aa32_fp16_spconv, s)) { 3455*f0984d40SFabiano Rosas return false; 3456*f0984d40SFabiano Rosas } 3457*f0984d40SFabiano Rosas 3458*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3459*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3460*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3461*f0984d40SFabiano Rosas return false; 3462*f0984d40SFabiano Rosas } 3463*f0984d40SFabiano Rosas 3464*f0984d40SFabiano Rosas if ((a->vd & 1) || (a->size != 1)) { 3465*f0984d40SFabiano Rosas return false; 3466*f0984d40SFabiano Rosas } 3467*f0984d40SFabiano Rosas 3468*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3469*f0984d40SFabiano Rosas return true; 3470*f0984d40SFabiano Rosas } 3471*f0984d40SFabiano Rosas 3472*f0984d40SFabiano Rosas fpst = fpstatus_ptr(FPST_STD); 3473*f0984d40SFabiano Rosas ahp = get_ahp_flag(); 3474*f0984d40SFabiano Rosas tmp3 = tcg_temp_new_i32(); 3475*f0984d40SFabiano Rosas tmp2 = tcg_temp_new_i32(); 3476*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 3477*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, 0, MO_32); 3478*f0984d40SFabiano Rosas read_neon_element32(tmp2, a->vm, 1, MO_32); 3479*f0984d40SFabiano Rosas tcg_gen_ext16u_i32(tmp3, tmp); 3480*f0984d40SFabiano Rosas gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); 3481*f0984d40SFabiano Rosas write_neon_element32(tmp3, a->vd, 0, MO_32); 3482*f0984d40SFabiano Rosas tcg_gen_shri_i32(tmp, tmp, 16); 3483*f0984d40SFabiano Rosas gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp); 3484*f0984d40SFabiano Rosas write_neon_element32(tmp, a->vd, 1, MO_32); 3485*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 3486*f0984d40SFabiano Rosas tcg_gen_ext16u_i32(tmp3, tmp2); 3487*f0984d40SFabiano Rosas gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); 3488*f0984d40SFabiano Rosas write_neon_element32(tmp3, a->vd, 2, MO_32); 3489*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp3); 3490*f0984d40SFabiano Rosas tcg_gen_shri_i32(tmp2, tmp2, 16); 3491*f0984d40SFabiano Rosas gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp); 3492*f0984d40SFabiano Rosas write_neon_element32(tmp2, a->vd, 3, MO_32); 3493*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp2); 3494*f0984d40SFabiano Rosas tcg_temp_free_i32(ahp); 3495*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpst); 3496*f0984d40SFabiano Rosas 3497*f0984d40SFabiano Rosas return true; 3498*f0984d40SFabiano Rosas } 3499*f0984d40SFabiano Rosas 3500*f0984d40SFabiano Rosas static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn) 3501*f0984d40SFabiano Rosas { 3502*f0984d40SFabiano Rosas int vec_size = a->q ? 16 : 8; 3503*f0984d40SFabiano Rosas int rd_ofs = neon_full_reg_offset(a->vd); 3504*f0984d40SFabiano Rosas int rm_ofs = neon_full_reg_offset(a->vm); 3505*f0984d40SFabiano Rosas 3506*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3507*f0984d40SFabiano Rosas return false; 3508*f0984d40SFabiano Rosas } 3509*f0984d40SFabiano Rosas 3510*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3511*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3512*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3513*f0984d40SFabiano Rosas return false; 3514*f0984d40SFabiano Rosas } 3515*f0984d40SFabiano Rosas 3516*f0984d40SFabiano Rosas if (a->size == 3) { 3517*f0984d40SFabiano Rosas return false; 3518*f0984d40SFabiano Rosas } 3519*f0984d40SFabiano Rosas 3520*f0984d40SFabiano Rosas if ((a->vd | a->vm) & a->q) { 3521*f0984d40SFabiano Rosas return false; 3522*f0984d40SFabiano Rosas } 3523*f0984d40SFabiano Rosas 3524*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3525*f0984d40SFabiano Rosas return true; 3526*f0984d40SFabiano Rosas } 3527*f0984d40SFabiano Rosas 3528*f0984d40SFabiano Rosas fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size); 3529*f0984d40SFabiano Rosas 3530*f0984d40SFabiano Rosas return true; 3531*f0984d40SFabiano Rosas } 3532*f0984d40SFabiano Rosas 3533*f0984d40SFabiano Rosas #define DO_2MISC_VEC(INSN, FN) \ 3534*f0984d40SFabiano Rosas static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3535*f0984d40SFabiano Rosas { \ 3536*f0984d40SFabiano Rosas return do_2misc_vec(s, a, FN); \ 3537*f0984d40SFabiano Rosas } 3538*f0984d40SFabiano Rosas 3539*f0984d40SFabiano Rosas DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg) 3540*f0984d40SFabiano Rosas DO_2MISC_VEC(VABS, tcg_gen_gvec_abs) 3541*f0984d40SFabiano Rosas DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0) 3542*f0984d40SFabiano Rosas DO_2MISC_VEC(VCGT0, gen_gvec_cgt0) 3543*f0984d40SFabiano Rosas DO_2MISC_VEC(VCLE0, gen_gvec_cle0) 3544*f0984d40SFabiano Rosas DO_2MISC_VEC(VCGE0, gen_gvec_cge0) 3545*f0984d40SFabiano Rosas DO_2MISC_VEC(VCLT0, gen_gvec_clt0) 3546*f0984d40SFabiano Rosas 3547*f0984d40SFabiano Rosas static bool trans_VMVN(DisasContext *s, arg_2misc *a) 3548*f0984d40SFabiano Rosas { 3549*f0984d40SFabiano Rosas if (a->size != 0) { 3550*f0984d40SFabiano Rosas return false; 3551*f0984d40SFabiano Rosas } 3552*f0984d40SFabiano Rosas return do_2misc_vec(s, a, tcg_gen_gvec_not); 3553*f0984d40SFabiano Rosas } 3554*f0984d40SFabiano Rosas 3555*f0984d40SFabiano Rosas #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \ 3556*f0984d40SFabiano Rosas static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 3557*f0984d40SFabiano Rosas uint32_t rm_ofs, uint32_t oprsz, \ 3558*f0984d40SFabiano Rosas uint32_t maxsz) \ 3559*f0984d40SFabiano Rosas { \ 3560*f0984d40SFabiano Rosas tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \ 3561*f0984d40SFabiano Rosas DATA, FUNC); \ 3562*f0984d40SFabiano Rosas } 3563*f0984d40SFabiano Rosas 3564*f0984d40SFabiano Rosas #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \ 3565*f0984d40SFabiano Rosas static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 3566*f0984d40SFabiano Rosas uint32_t rm_ofs, uint32_t oprsz, \ 3567*f0984d40SFabiano Rosas uint32_t maxsz) \ 3568*f0984d40SFabiano Rosas { \ 3569*f0984d40SFabiano Rosas tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \ 3570*f0984d40SFabiano Rosas } 3571*f0984d40SFabiano Rosas 3572*f0984d40SFabiano Rosas WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0) 3573*f0984d40SFabiano Rosas WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1) 3574*f0984d40SFabiano Rosas WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0) 3575*f0984d40SFabiano Rosas WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1) 3576*f0984d40SFabiano Rosas WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0) 3577*f0984d40SFabiano Rosas WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0) 3578*f0984d40SFabiano Rosas WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0) 3579*f0984d40SFabiano Rosas 3580*f0984d40SFabiano Rosas #define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \ 3581*f0984d40SFabiano Rosas static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3582*f0984d40SFabiano Rosas { \ 3583*f0984d40SFabiano Rosas if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \ 3584*f0984d40SFabiano Rosas return false; \ 3585*f0984d40SFabiano Rosas } \ 3586*f0984d40SFabiano Rosas return do_2misc_vec(s, a, gen_##INSN); \ 3587*f0984d40SFabiano Rosas } 3588*f0984d40SFabiano Rosas 3589*f0984d40SFabiano Rosas DO_2M_CRYPTO(AESE, aa32_aes, 0) 3590*f0984d40SFabiano Rosas DO_2M_CRYPTO(AESD, aa32_aes, 0) 3591*f0984d40SFabiano Rosas DO_2M_CRYPTO(AESMC, aa32_aes, 0) 3592*f0984d40SFabiano Rosas DO_2M_CRYPTO(AESIMC, aa32_aes, 0) 3593*f0984d40SFabiano Rosas DO_2M_CRYPTO(SHA1H, aa32_sha1, 2) 3594*f0984d40SFabiano Rosas DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2) 3595*f0984d40SFabiano Rosas DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2) 3596*f0984d40SFabiano Rosas 3597*f0984d40SFabiano Rosas static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) 3598*f0984d40SFabiano Rosas { 3599*f0984d40SFabiano Rosas TCGv_i32 tmp; 3600*f0984d40SFabiano Rosas int pass; 3601*f0984d40SFabiano Rosas 3602*f0984d40SFabiano Rosas /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ 3603*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3604*f0984d40SFabiano Rosas return false; 3605*f0984d40SFabiano Rosas } 3606*f0984d40SFabiano Rosas 3607*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3608*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3609*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3610*f0984d40SFabiano Rosas return false; 3611*f0984d40SFabiano Rosas } 3612*f0984d40SFabiano Rosas 3613*f0984d40SFabiano Rosas if (!fn) { 3614*f0984d40SFabiano Rosas return false; 3615*f0984d40SFabiano Rosas } 3616*f0984d40SFabiano Rosas 3617*f0984d40SFabiano Rosas if ((a->vd | a->vm) & a->q) { 3618*f0984d40SFabiano Rosas return false; 3619*f0984d40SFabiano Rosas } 3620*f0984d40SFabiano Rosas 3621*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3622*f0984d40SFabiano Rosas return true; 3623*f0984d40SFabiano Rosas } 3624*f0984d40SFabiano Rosas 3625*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 3626*f0984d40SFabiano Rosas for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 3627*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, pass, MO_32); 3628*f0984d40SFabiano Rosas fn(tmp, tmp); 3629*f0984d40SFabiano Rosas write_neon_element32(tmp, a->vd, pass, MO_32); 3630*f0984d40SFabiano Rosas } 3631*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 3632*f0984d40SFabiano Rosas 3633*f0984d40SFabiano Rosas return true; 3634*f0984d40SFabiano Rosas } 3635*f0984d40SFabiano Rosas 3636*f0984d40SFabiano Rosas static bool trans_VREV32(DisasContext *s, arg_2misc *a) 3637*f0984d40SFabiano Rosas { 3638*f0984d40SFabiano Rosas static NeonGenOneOpFn * const fn[] = { 3639*f0984d40SFabiano Rosas tcg_gen_bswap32_i32, 3640*f0984d40SFabiano Rosas gen_swap_half, 3641*f0984d40SFabiano Rosas NULL, 3642*f0984d40SFabiano Rosas NULL, 3643*f0984d40SFabiano Rosas }; 3644*f0984d40SFabiano Rosas return do_2misc(s, a, fn[a->size]); 3645*f0984d40SFabiano Rosas } 3646*f0984d40SFabiano Rosas 3647*f0984d40SFabiano Rosas static bool trans_VREV16(DisasContext *s, arg_2misc *a) 3648*f0984d40SFabiano Rosas { 3649*f0984d40SFabiano Rosas if (a->size != 0) { 3650*f0984d40SFabiano Rosas return false; 3651*f0984d40SFabiano Rosas } 3652*f0984d40SFabiano Rosas return do_2misc(s, a, gen_rev16); 3653*f0984d40SFabiano Rosas } 3654*f0984d40SFabiano Rosas 3655*f0984d40SFabiano Rosas static bool trans_VCLS(DisasContext *s, arg_2misc *a) 3656*f0984d40SFabiano Rosas { 3657*f0984d40SFabiano Rosas static NeonGenOneOpFn * const fn[] = { 3658*f0984d40SFabiano Rosas gen_helper_neon_cls_s8, 3659*f0984d40SFabiano Rosas gen_helper_neon_cls_s16, 3660*f0984d40SFabiano Rosas gen_helper_neon_cls_s32, 3661*f0984d40SFabiano Rosas NULL, 3662*f0984d40SFabiano Rosas }; 3663*f0984d40SFabiano Rosas return do_2misc(s, a, fn[a->size]); 3664*f0984d40SFabiano Rosas } 3665*f0984d40SFabiano Rosas 3666*f0984d40SFabiano Rosas static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm) 3667*f0984d40SFabiano Rosas { 3668*f0984d40SFabiano Rosas tcg_gen_clzi_i32(rd, rm, 32); 3669*f0984d40SFabiano Rosas } 3670*f0984d40SFabiano Rosas 3671*f0984d40SFabiano Rosas static bool trans_VCLZ(DisasContext *s, arg_2misc *a) 3672*f0984d40SFabiano Rosas { 3673*f0984d40SFabiano Rosas static NeonGenOneOpFn * const fn[] = { 3674*f0984d40SFabiano Rosas gen_helper_neon_clz_u8, 3675*f0984d40SFabiano Rosas gen_helper_neon_clz_u16, 3676*f0984d40SFabiano Rosas do_VCLZ_32, 3677*f0984d40SFabiano Rosas NULL, 3678*f0984d40SFabiano Rosas }; 3679*f0984d40SFabiano Rosas return do_2misc(s, a, fn[a->size]); 3680*f0984d40SFabiano Rosas } 3681*f0984d40SFabiano Rosas 3682*f0984d40SFabiano Rosas static bool trans_VCNT(DisasContext *s, arg_2misc *a) 3683*f0984d40SFabiano Rosas { 3684*f0984d40SFabiano Rosas if (a->size != 0) { 3685*f0984d40SFabiano Rosas return false; 3686*f0984d40SFabiano Rosas } 3687*f0984d40SFabiano Rosas return do_2misc(s, a, gen_helper_neon_cnt_u8); 3688*f0984d40SFabiano Rosas } 3689*f0984d40SFabiano Rosas 3690*f0984d40SFabiano Rosas static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 3691*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) 3692*f0984d40SFabiano Rosas { 3693*f0984d40SFabiano Rosas tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs, 3694*f0984d40SFabiano Rosas vece == MO_16 ? 0x7fff : 0x7fffffff, 3695*f0984d40SFabiano Rosas oprsz, maxsz); 3696*f0984d40SFabiano Rosas } 3697*f0984d40SFabiano Rosas 3698*f0984d40SFabiano Rosas static bool trans_VABS_F(DisasContext *s, arg_2misc *a) 3699*f0984d40SFabiano Rosas { 3700*f0984d40SFabiano Rosas if (a->size == MO_16) { 3701*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { 3702*f0984d40SFabiano Rosas return false; 3703*f0984d40SFabiano Rosas } 3704*f0984d40SFabiano Rosas } else if (a->size != MO_32) { 3705*f0984d40SFabiano Rosas return false; 3706*f0984d40SFabiano Rosas } 3707*f0984d40SFabiano Rosas return do_2misc_vec(s, a, gen_VABS_F); 3708*f0984d40SFabiano Rosas } 3709*f0984d40SFabiano Rosas 3710*f0984d40SFabiano Rosas static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 3711*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) 3712*f0984d40SFabiano Rosas { 3713*f0984d40SFabiano Rosas tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs, 3714*f0984d40SFabiano Rosas vece == MO_16 ? 0x8000 : 0x80000000, 3715*f0984d40SFabiano Rosas oprsz, maxsz); 3716*f0984d40SFabiano Rosas } 3717*f0984d40SFabiano Rosas 3718*f0984d40SFabiano Rosas static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) 3719*f0984d40SFabiano Rosas { 3720*f0984d40SFabiano Rosas if (a->size == MO_16) { 3721*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { 3722*f0984d40SFabiano Rosas return false; 3723*f0984d40SFabiano Rosas } 3724*f0984d40SFabiano Rosas } else if (a->size != MO_32) { 3725*f0984d40SFabiano Rosas return false; 3726*f0984d40SFabiano Rosas } 3727*f0984d40SFabiano Rosas return do_2misc_vec(s, a, gen_VNEG_F); 3728*f0984d40SFabiano Rosas } 3729*f0984d40SFabiano Rosas 3730*f0984d40SFabiano Rosas static bool trans_VRECPE(DisasContext *s, arg_2misc *a) 3731*f0984d40SFabiano Rosas { 3732*f0984d40SFabiano Rosas if (a->size != 2) { 3733*f0984d40SFabiano Rosas return false; 3734*f0984d40SFabiano Rosas } 3735*f0984d40SFabiano Rosas return do_2misc(s, a, gen_helper_recpe_u32); 3736*f0984d40SFabiano Rosas } 3737*f0984d40SFabiano Rosas 3738*f0984d40SFabiano Rosas static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) 3739*f0984d40SFabiano Rosas { 3740*f0984d40SFabiano Rosas if (a->size != 2) { 3741*f0984d40SFabiano Rosas return false; 3742*f0984d40SFabiano Rosas } 3743*f0984d40SFabiano Rosas return do_2misc(s, a, gen_helper_rsqrte_u32); 3744*f0984d40SFabiano Rosas } 3745*f0984d40SFabiano Rosas 3746*f0984d40SFabiano Rosas #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \ 3747*f0984d40SFabiano Rosas static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \ 3748*f0984d40SFabiano Rosas { \ 3749*f0984d40SFabiano Rosas FUNC(d, cpu_env, m); \ 3750*f0984d40SFabiano Rosas } 3751*f0984d40SFabiano Rosas 3752*f0984d40SFabiano Rosas WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8) 3753*f0984d40SFabiano Rosas WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16) 3754*f0984d40SFabiano Rosas WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32) 3755*f0984d40SFabiano Rosas WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8) 3756*f0984d40SFabiano Rosas WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16) 3757*f0984d40SFabiano Rosas WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32) 3758*f0984d40SFabiano Rosas 3759*f0984d40SFabiano Rosas static bool trans_VQABS(DisasContext *s, arg_2misc *a) 3760*f0984d40SFabiano Rosas { 3761*f0984d40SFabiano Rosas static NeonGenOneOpFn * const fn[] = { 3762*f0984d40SFabiano Rosas gen_VQABS_s8, 3763*f0984d40SFabiano Rosas gen_VQABS_s16, 3764*f0984d40SFabiano Rosas gen_VQABS_s32, 3765*f0984d40SFabiano Rosas NULL, 3766*f0984d40SFabiano Rosas }; 3767*f0984d40SFabiano Rosas return do_2misc(s, a, fn[a->size]); 3768*f0984d40SFabiano Rosas } 3769*f0984d40SFabiano Rosas 3770*f0984d40SFabiano Rosas static bool trans_VQNEG(DisasContext *s, arg_2misc *a) 3771*f0984d40SFabiano Rosas { 3772*f0984d40SFabiano Rosas static NeonGenOneOpFn * const fn[] = { 3773*f0984d40SFabiano Rosas gen_VQNEG_s8, 3774*f0984d40SFabiano Rosas gen_VQNEG_s16, 3775*f0984d40SFabiano Rosas gen_VQNEG_s32, 3776*f0984d40SFabiano Rosas NULL, 3777*f0984d40SFabiano Rosas }; 3778*f0984d40SFabiano Rosas return do_2misc(s, a, fn[a->size]); 3779*f0984d40SFabiano Rosas } 3780*f0984d40SFabiano Rosas 3781*f0984d40SFabiano Rosas #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \ 3782*f0984d40SFabiano Rosas static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ 3783*f0984d40SFabiano Rosas uint32_t rm_ofs, \ 3784*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) \ 3785*f0984d40SFabiano Rosas { \ 3786*f0984d40SFabiano Rosas static gen_helper_gvec_2_ptr * const fns[4] = { \ 3787*f0984d40SFabiano Rosas NULL, HFUNC, SFUNC, NULL, \ 3788*f0984d40SFabiano Rosas }; \ 3789*f0984d40SFabiano Rosas TCGv_ptr fpst; \ 3790*f0984d40SFabiano Rosas fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \ 3791*f0984d40SFabiano Rosas tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \ 3792*f0984d40SFabiano Rosas fns[vece]); \ 3793*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpst); \ 3794*f0984d40SFabiano Rosas } \ 3795*f0984d40SFabiano Rosas static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3796*f0984d40SFabiano Rosas { \ 3797*f0984d40SFabiano Rosas if (a->size == MO_16) { \ 3798*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 3799*f0984d40SFabiano Rosas return false; \ 3800*f0984d40SFabiano Rosas } \ 3801*f0984d40SFabiano Rosas } else if (a->size != MO_32) { \ 3802*f0984d40SFabiano Rosas return false; \ 3803*f0984d40SFabiano Rosas } \ 3804*f0984d40SFabiano Rosas return do_2misc_vec(s, a, gen_##INSN); \ 3805*f0984d40SFabiano Rosas } 3806*f0984d40SFabiano Rosas 3807*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s) 3808*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s) 3809*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s) 3810*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s) 3811*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s) 3812*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s) 3813*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s) 3814*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos) 3815*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos) 3816*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs) 3817*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs) 3818*f0984d40SFabiano Rosas 3819*f0984d40SFabiano Rosas DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s) 3820*f0984d40SFabiano Rosas 3821*f0984d40SFabiano Rosas static bool trans_VRINTX(DisasContext *s, arg_2misc *a) 3822*f0984d40SFabiano Rosas { 3823*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 3824*f0984d40SFabiano Rosas return false; 3825*f0984d40SFabiano Rosas } 3826*f0984d40SFabiano Rosas return trans_VRINTX_impl(s, a); 3827*f0984d40SFabiano Rosas } 3828*f0984d40SFabiano Rosas 3829*f0984d40SFabiano Rosas #define DO_VEC_RMODE(INSN, RMODE, OP) \ 3830*f0984d40SFabiano Rosas static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ 3831*f0984d40SFabiano Rosas uint32_t rm_ofs, \ 3832*f0984d40SFabiano Rosas uint32_t oprsz, uint32_t maxsz) \ 3833*f0984d40SFabiano Rosas { \ 3834*f0984d40SFabiano Rosas static gen_helper_gvec_2_ptr * const fns[4] = { \ 3835*f0984d40SFabiano Rosas NULL, \ 3836*f0984d40SFabiano Rosas gen_helper_gvec_##OP##h, \ 3837*f0984d40SFabiano Rosas gen_helper_gvec_##OP##s, \ 3838*f0984d40SFabiano Rosas NULL, \ 3839*f0984d40SFabiano Rosas }; \ 3840*f0984d40SFabiano Rosas TCGv_ptr fpst; \ 3841*f0984d40SFabiano Rosas fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \ 3842*f0984d40SFabiano Rosas tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \ 3843*f0984d40SFabiano Rosas arm_rmode_to_sf(RMODE), fns[vece]); \ 3844*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpst); \ 3845*f0984d40SFabiano Rosas } \ 3846*f0984d40SFabiano Rosas static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3847*f0984d40SFabiano Rosas { \ 3848*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \ 3849*f0984d40SFabiano Rosas return false; \ 3850*f0984d40SFabiano Rosas } \ 3851*f0984d40SFabiano Rosas if (a->size == MO_16) { \ 3852*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 3853*f0984d40SFabiano Rosas return false; \ 3854*f0984d40SFabiano Rosas } \ 3855*f0984d40SFabiano Rosas } else if (a->size != MO_32) { \ 3856*f0984d40SFabiano Rosas return false; \ 3857*f0984d40SFabiano Rosas } \ 3858*f0984d40SFabiano Rosas return do_2misc_vec(s, a, gen_##INSN); \ 3859*f0984d40SFabiano Rosas } 3860*f0984d40SFabiano Rosas 3861*f0984d40SFabiano Rosas DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u) 3862*f0984d40SFabiano Rosas DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s) 3863*f0984d40SFabiano Rosas DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u) 3864*f0984d40SFabiano Rosas DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s) 3865*f0984d40SFabiano Rosas DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u) 3866*f0984d40SFabiano Rosas DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s) 3867*f0984d40SFabiano Rosas DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u) 3868*f0984d40SFabiano Rosas DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s) 3869*f0984d40SFabiano Rosas 3870*f0984d40SFabiano Rosas DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_) 3871*f0984d40SFabiano Rosas DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_) 3872*f0984d40SFabiano Rosas DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_) 3873*f0984d40SFabiano Rosas DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_) 3874*f0984d40SFabiano Rosas DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_) 3875*f0984d40SFabiano Rosas 3876*f0984d40SFabiano Rosas static bool trans_VSWP(DisasContext *s, arg_2misc *a) 3877*f0984d40SFabiano Rosas { 3878*f0984d40SFabiano Rosas TCGv_i64 rm, rd; 3879*f0984d40SFabiano Rosas int pass; 3880*f0984d40SFabiano Rosas 3881*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3882*f0984d40SFabiano Rosas return false; 3883*f0984d40SFabiano Rosas } 3884*f0984d40SFabiano Rosas 3885*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3886*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3887*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3888*f0984d40SFabiano Rosas return false; 3889*f0984d40SFabiano Rosas } 3890*f0984d40SFabiano Rosas 3891*f0984d40SFabiano Rosas if (a->size != 0) { 3892*f0984d40SFabiano Rosas return false; 3893*f0984d40SFabiano Rosas } 3894*f0984d40SFabiano Rosas 3895*f0984d40SFabiano Rosas if ((a->vd | a->vm) & a->q) { 3896*f0984d40SFabiano Rosas return false; 3897*f0984d40SFabiano Rosas } 3898*f0984d40SFabiano Rosas 3899*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3900*f0984d40SFabiano Rosas return true; 3901*f0984d40SFabiano Rosas } 3902*f0984d40SFabiano Rosas 3903*f0984d40SFabiano Rosas rm = tcg_temp_new_i64(); 3904*f0984d40SFabiano Rosas rd = tcg_temp_new_i64(); 3905*f0984d40SFabiano Rosas for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 3906*f0984d40SFabiano Rosas read_neon_element64(rm, a->vm, pass, MO_64); 3907*f0984d40SFabiano Rosas read_neon_element64(rd, a->vd, pass, MO_64); 3908*f0984d40SFabiano Rosas write_neon_element64(rm, a->vd, pass, MO_64); 3909*f0984d40SFabiano Rosas write_neon_element64(rd, a->vm, pass, MO_64); 3910*f0984d40SFabiano Rosas } 3911*f0984d40SFabiano Rosas tcg_temp_free_i64(rm); 3912*f0984d40SFabiano Rosas tcg_temp_free_i64(rd); 3913*f0984d40SFabiano Rosas 3914*f0984d40SFabiano Rosas return true; 3915*f0984d40SFabiano Rosas } 3916*f0984d40SFabiano Rosas static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1) 3917*f0984d40SFabiano Rosas { 3918*f0984d40SFabiano Rosas TCGv_i32 rd, tmp; 3919*f0984d40SFabiano Rosas 3920*f0984d40SFabiano Rosas rd = tcg_temp_new_i32(); 3921*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 3922*f0984d40SFabiano Rosas 3923*f0984d40SFabiano Rosas tcg_gen_shli_i32(rd, t0, 8); 3924*f0984d40SFabiano Rosas tcg_gen_andi_i32(rd, rd, 0xff00ff00); 3925*f0984d40SFabiano Rosas tcg_gen_andi_i32(tmp, t1, 0x00ff00ff); 3926*f0984d40SFabiano Rosas tcg_gen_or_i32(rd, rd, tmp); 3927*f0984d40SFabiano Rosas 3928*f0984d40SFabiano Rosas tcg_gen_shri_i32(t1, t1, 8); 3929*f0984d40SFabiano Rosas tcg_gen_andi_i32(t1, t1, 0x00ff00ff); 3930*f0984d40SFabiano Rosas tcg_gen_andi_i32(tmp, t0, 0xff00ff00); 3931*f0984d40SFabiano Rosas tcg_gen_or_i32(t1, t1, tmp); 3932*f0984d40SFabiano Rosas tcg_gen_mov_i32(t0, rd); 3933*f0984d40SFabiano Rosas 3934*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 3935*f0984d40SFabiano Rosas tcg_temp_free_i32(rd); 3936*f0984d40SFabiano Rosas } 3937*f0984d40SFabiano Rosas 3938*f0984d40SFabiano Rosas static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1) 3939*f0984d40SFabiano Rosas { 3940*f0984d40SFabiano Rosas TCGv_i32 rd, tmp; 3941*f0984d40SFabiano Rosas 3942*f0984d40SFabiano Rosas rd = tcg_temp_new_i32(); 3943*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 3944*f0984d40SFabiano Rosas 3945*f0984d40SFabiano Rosas tcg_gen_shli_i32(rd, t0, 16); 3946*f0984d40SFabiano Rosas tcg_gen_andi_i32(tmp, t1, 0xffff); 3947*f0984d40SFabiano Rosas tcg_gen_or_i32(rd, rd, tmp); 3948*f0984d40SFabiano Rosas tcg_gen_shri_i32(t1, t1, 16); 3949*f0984d40SFabiano Rosas tcg_gen_andi_i32(tmp, t0, 0xffff0000); 3950*f0984d40SFabiano Rosas tcg_gen_or_i32(t1, t1, tmp); 3951*f0984d40SFabiano Rosas tcg_gen_mov_i32(t0, rd); 3952*f0984d40SFabiano Rosas 3953*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 3954*f0984d40SFabiano Rosas tcg_temp_free_i32(rd); 3955*f0984d40SFabiano Rosas } 3956*f0984d40SFabiano Rosas 3957*f0984d40SFabiano Rosas static bool trans_VTRN(DisasContext *s, arg_2misc *a) 3958*f0984d40SFabiano Rosas { 3959*f0984d40SFabiano Rosas TCGv_i32 tmp, tmp2; 3960*f0984d40SFabiano Rosas int pass; 3961*f0984d40SFabiano Rosas 3962*f0984d40SFabiano Rosas if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3963*f0984d40SFabiano Rosas return false; 3964*f0984d40SFabiano Rosas } 3965*f0984d40SFabiano Rosas 3966*f0984d40SFabiano Rosas /* UNDEF accesses to D16-D31 if they don't exist. */ 3967*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_simd_r32, s) && 3968*f0984d40SFabiano Rosas ((a->vd | a->vm) & 0x10)) { 3969*f0984d40SFabiano Rosas return false; 3970*f0984d40SFabiano Rosas } 3971*f0984d40SFabiano Rosas 3972*f0984d40SFabiano Rosas if ((a->vd | a->vm) & a->q) { 3973*f0984d40SFabiano Rosas return false; 3974*f0984d40SFabiano Rosas } 3975*f0984d40SFabiano Rosas 3976*f0984d40SFabiano Rosas if (a->size == 3) { 3977*f0984d40SFabiano Rosas return false; 3978*f0984d40SFabiano Rosas } 3979*f0984d40SFabiano Rosas 3980*f0984d40SFabiano Rosas if (!vfp_access_check(s)) { 3981*f0984d40SFabiano Rosas return true; 3982*f0984d40SFabiano Rosas } 3983*f0984d40SFabiano Rosas 3984*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 3985*f0984d40SFabiano Rosas tmp2 = tcg_temp_new_i32(); 3986*f0984d40SFabiano Rosas if (a->size == MO_32) { 3987*f0984d40SFabiano Rosas for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) { 3988*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, pass, MO_32); 3989*f0984d40SFabiano Rosas read_neon_element32(tmp2, a->vd, pass + 1, MO_32); 3990*f0984d40SFabiano Rosas write_neon_element32(tmp2, a->vm, pass, MO_32); 3991*f0984d40SFabiano Rosas write_neon_element32(tmp, a->vd, pass + 1, MO_32); 3992*f0984d40SFabiano Rosas } 3993*f0984d40SFabiano Rosas } else { 3994*f0984d40SFabiano Rosas for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 3995*f0984d40SFabiano Rosas read_neon_element32(tmp, a->vm, pass, MO_32); 3996*f0984d40SFabiano Rosas read_neon_element32(tmp2, a->vd, pass, MO_32); 3997*f0984d40SFabiano Rosas if (a->size == MO_8) { 3998*f0984d40SFabiano Rosas gen_neon_trn_u8(tmp, tmp2); 3999*f0984d40SFabiano Rosas } else { 4000*f0984d40SFabiano Rosas gen_neon_trn_u16(tmp, tmp2); 4001*f0984d40SFabiano Rosas } 4002*f0984d40SFabiano Rosas write_neon_element32(tmp2, a->vm, pass, MO_32); 4003*f0984d40SFabiano Rosas write_neon_element32(tmp, a->vd, pass, MO_32); 4004*f0984d40SFabiano Rosas } 4005*f0984d40SFabiano Rosas } 4006*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 4007*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp2); 4008*f0984d40SFabiano Rosas return true; 4009*f0984d40SFabiano Rosas } 4010*f0984d40SFabiano Rosas 4011*f0984d40SFabiano Rosas static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a) 4012*f0984d40SFabiano Rosas { 4013*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_i8mm, s)) { 4014*f0984d40SFabiano Rosas return false; 4015*f0984d40SFabiano Rosas } 4016*f0984d40SFabiano Rosas return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 4017*f0984d40SFabiano Rosas gen_helper_gvec_smmla_b); 4018*f0984d40SFabiano Rosas } 4019*f0984d40SFabiano Rosas 4020*f0984d40SFabiano Rosas static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a) 4021*f0984d40SFabiano Rosas { 4022*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_i8mm, s)) { 4023*f0984d40SFabiano Rosas return false; 4024*f0984d40SFabiano Rosas } 4025*f0984d40SFabiano Rosas return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 4026*f0984d40SFabiano Rosas gen_helper_gvec_ummla_b); 4027*f0984d40SFabiano Rosas } 4028*f0984d40SFabiano Rosas 4029*f0984d40SFabiano Rosas static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a) 4030*f0984d40SFabiano Rosas { 4031*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_i8mm, s)) { 4032*f0984d40SFabiano Rosas return false; 4033*f0984d40SFabiano Rosas } 4034*f0984d40SFabiano Rosas return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 4035*f0984d40SFabiano Rosas gen_helper_gvec_usmmla_b); 4036*f0984d40SFabiano Rosas } 4037*f0984d40SFabiano Rosas 4038*f0984d40SFabiano Rosas static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a) 4039*f0984d40SFabiano Rosas { 4040*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_bf16, s)) { 4041*f0984d40SFabiano Rosas return false; 4042*f0984d40SFabiano Rosas } 4043*f0984d40SFabiano Rosas return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 4044*f0984d40SFabiano Rosas gen_helper_gvec_bfmmla); 4045*f0984d40SFabiano Rosas } 4046*f0984d40SFabiano Rosas 4047*f0984d40SFabiano Rosas static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a) 4048*f0984d40SFabiano Rosas { 4049*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_bf16, s)) { 4050*f0984d40SFabiano Rosas return false; 4051*f0984d40SFabiano Rosas } 4052*f0984d40SFabiano Rosas return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD, 4053*f0984d40SFabiano Rosas gen_helper_gvec_bfmlal); 4054*f0984d40SFabiano Rosas } 4055*f0984d40SFabiano Rosas 4056*f0984d40SFabiano Rosas static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a) 4057*f0984d40SFabiano Rosas { 4058*f0984d40SFabiano Rosas if (!dc_isar_feature(aa32_bf16, s)) { 4059*f0984d40SFabiano Rosas return false; 4060*f0984d40SFabiano Rosas } 4061*f0984d40SFabiano Rosas return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm, 4062*f0984d40SFabiano Rosas (a->index << 1) | a->q, FPST_STD, 4063*f0984d40SFabiano Rosas gen_helper_gvec_bfmlal_idx); 4064*f0984d40SFabiano Rosas } 4065