1*f0984d40SFabiano Rosas /* 2*f0984d40SFabiano Rosas * AArch64 SME translation 3*f0984d40SFabiano Rosas * 4*f0984d40SFabiano Rosas * Copyright (c) 2022 Linaro, Ltd 5*f0984d40SFabiano Rosas * 6*f0984d40SFabiano Rosas * This library is free software; you can redistribute it and/or 7*f0984d40SFabiano Rosas * modify it under the terms of the GNU Lesser General Public 8*f0984d40SFabiano Rosas * License as published by the Free Software Foundation; either 9*f0984d40SFabiano Rosas * version 2.1 of the License, or (at your option) any later version. 10*f0984d40SFabiano Rosas * 11*f0984d40SFabiano Rosas * This library is distributed in the hope that it will be useful, 12*f0984d40SFabiano Rosas * but WITHOUT ANY WARRANTY; without even the implied warranty of 13*f0984d40SFabiano Rosas * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14*f0984d40SFabiano Rosas * Lesser General Public License for more details. 15*f0984d40SFabiano Rosas * 16*f0984d40SFabiano Rosas * You should have received a copy of the GNU Lesser General Public 17*f0984d40SFabiano Rosas * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18*f0984d40SFabiano Rosas */ 19*f0984d40SFabiano Rosas 20*f0984d40SFabiano Rosas #include "qemu/osdep.h" 21*f0984d40SFabiano Rosas #include "cpu.h" 22*f0984d40SFabiano Rosas #include "tcg/tcg-op.h" 23*f0984d40SFabiano Rosas #include "tcg/tcg-op-gvec.h" 24*f0984d40SFabiano Rosas #include "tcg/tcg-gvec-desc.h" 25*f0984d40SFabiano Rosas #include "translate.h" 26*f0984d40SFabiano Rosas #include "exec/helper-gen.h" 27*f0984d40SFabiano Rosas #include "translate-a64.h" 28*f0984d40SFabiano Rosas #include "fpu/softfloat.h" 29*f0984d40SFabiano Rosas 30*f0984d40SFabiano Rosas 31*f0984d40SFabiano Rosas /* 32*f0984d40SFabiano Rosas * Include the generated decoder. 33*f0984d40SFabiano Rosas */ 34*f0984d40SFabiano Rosas 35*f0984d40SFabiano Rosas #include "decode-sme.c.inc" 36*f0984d40SFabiano Rosas 37*f0984d40SFabiano Rosas 38*f0984d40SFabiano Rosas /* 39*f0984d40SFabiano Rosas * Resolve tile.size[index] to a host pointer, where tile and index 40*f0984d40SFabiano Rosas * are always decoded together, dependent on the element size. 41*f0984d40SFabiano Rosas */ 42*f0984d40SFabiano Rosas static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, 43*f0984d40SFabiano Rosas int tile_index, bool vertical) 44*f0984d40SFabiano Rosas { 45*f0984d40SFabiano Rosas int tile = tile_index >> (4 - esz); 46*f0984d40SFabiano Rosas int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz); 47*f0984d40SFabiano Rosas int pos, len, offset; 48*f0984d40SFabiano Rosas TCGv_i32 tmp; 49*f0984d40SFabiano Rosas TCGv_ptr addr; 50*f0984d40SFabiano Rosas 51*f0984d40SFabiano Rosas /* Compute the final index, which is Rs+imm. */ 52*f0984d40SFabiano Rosas tmp = tcg_temp_new_i32(); 53*f0984d40SFabiano Rosas tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs)); 54*f0984d40SFabiano Rosas tcg_gen_addi_i32(tmp, tmp, index); 55*f0984d40SFabiano Rosas 56*f0984d40SFabiano Rosas /* Prepare a power-of-two modulo via extraction of @len bits. */ 57*f0984d40SFabiano Rosas len = ctz32(streaming_vec_reg_size(s)) - esz; 58*f0984d40SFabiano Rosas 59*f0984d40SFabiano Rosas if (vertical) { 60*f0984d40SFabiano Rosas /* 61*f0984d40SFabiano Rosas * Compute the byte offset of the index within the tile: 62*f0984d40SFabiano Rosas * (index % (svl / size)) * size 63*f0984d40SFabiano Rosas * = (index % (svl >> esz)) << esz 64*f0984d40SFabiano Rosas * Perform the power-of-two modulo via extraction of the low @len bits. 65*f0984d40SFabiano Rosas * Perform the multiply by shifting left by @pos bits. 66*f0984d40SFabiano Rosas * Perform these operations simultaneously via deposit into zero. 67*f0984d40SFabiano Rosas */ 68*f0984d40SFabiano Rosas pos = esz; 69*f0984d40SFabiano Rosas tcg_gen_deposit_z_i32(tmp, tmp, pos, len); 70*f0984d40SFabiano Rosas 71*f0984d40SFabiano Rosas /* 72*f0984d40SFabiano Rosas * For big-endian, adjust the indexed column byte offset within 73*f0984d40SFabiano Rosas * the uint64_t host words that make up env->zarray[]. 74*f0984d40SFabiano Rosas */ 75*f0984d40SFabiano Rosas if (HOST_BIG_ENDIAN && esz < MO_64) { 76*f0984d40SFabiano Rosas tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz)); 77*f0984d40SFabiano Rosas } 78*f0984d40SFabiano Rosas } else { 79*f0984d40SFabiano Rosas /* 80*f0984d40SFabiano Rosas * Compute the byte offset of the index within the tile: 81*f0984d40SFabiano Rosas * (index % (svl / size)) * (size * sizeof(row)) 82*f0984d40SFabiano Rosas * = (index % (svl >> esz)) << (esz + log2(sizeof(row))) 83*f0984d40SFabiano Rosas */ 84*f0984d40SFabiano Rosas pos = esz + ctz32(sizeof(ARMVectorReg)); 85*f0984d40SFabiano Rosas tcg_gen_deposit_z_i32(tmp, tmp, pos, len); 86*f0984d40SFabiano Rosas 87*f0984d40SFabiano Rosas /* Row slices are always aligned and need no endian adjustment. */ 88*f0984d40SFabiano Rosas } 89*f0984d40SFabiano Rosas 90*f0984d40SFabiano Rosas /* The tile byte offset within env->zarray is the row. */ 91*f0984d40SFabiano Rosas offset = tile * sizeof(ARMVectorReg); 92*f0984d40SFabiano Rosas 93*f0984d40SFabiano Rosas /* Include the byte offset of zarray to make this relative to env. */ 94*f0984d40SFabiano Rosas offset += offsetof(CPUARMState, zarray); 95*f0984d40SFabiano Rosas tcg_gen_addi_i32(tmp, tmp, offset); 96*f0984d40SFabiano Rosas 97*f0984d40SFabiano Rosas /* Add the byte offset to env to produce the final pointer. */ 98*f0984d40SFabiano Rosas addr = tcg_temp_new_ptr(); 99*f0984d40SFabiano Rosas tcg_gen_ext_i32_ptr(addr, tmp); 100*f0984d40SFabiano Rosas tcg_temp_free_i32(tmp); 101*f0984d40SFabiano Rosas tcg_gen_add_ptr(addr, addr, cpu_env); 102*f0984d40SFabiano Rosas 103*f0984d40SFabiano Rosas return addr; 104*f0984d40SFabiano Rosas } 105*f0984d40SFabiano Rosas 106*f0984d40SFabiano Rosas static bool trans_ZERO(DisasContext *s, arg_ZERO *a) 107*f0984d40SFabiano Rosas { 108*f0984d40SFabiano Rosas if (!dc_isar_feature(aa64_sme, s)) { 109*f0984d40SFabiano Rosas return false; 110*f0984d40SFabiano Rosas } 111*f0984d40SFabiano Rosas if (sme_za_enabled_check(s)) { 112*f0984d40SFabiano Rosas gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm), 113*f0984d40SFabiano Rosas tcg_constant_i32(streaming_vec_reg_size(s))); 114*f0984d40SFabiano Rosas } 115*f0984d40SFabiano Rosas return true; 116*f0984d40SFabiano Rosas } 117*f0984d40SFabiano Rosas 118*f0984d40SFabiano Rosas static bool trans_MOVA(DisasContext *s, arg_MOVA *a) 119*f0984d40SFabiano Rosas { 120*f0984d40SFabiano Rosas static gen_helper_gvec_4 * const h_fns[5] = { 121*f0984d40SFabiano Rosas gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 122*f0984d40SFabiano Rosas gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d, 123*f0984d40SFabiano Rosas gen_helper_sve_sel_zpzz_q 124*f0984d40SFabiano Rosas }; 125*f0984d40SFabiano Rosas static gen_helper_gvec_3 * const cz_fns[5] = { 126*f0984d40SFabiano Rosas gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h, 127*f0984d40SFabiano Rosas gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d, 128*f0984d40SFabiano Rosas gen_helper_sme_mova_cz_q, 129*f0984d40SFabiano Rosas }; 130*f0984d40SFabiano Rosas static gen_helper_gvec_3 * const zc_fns[5] = { 131*f0984d40SFabiano Rosas gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h, 132*f0984d40SFabiano Rosas gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d, 133*f0984d40SFabiano Rosas gen_helper_sme_mova_zc_q, 134*f0984d40SFabiano Rosas }; 135*f0984d40SFabiano Rosas 136*f0984d40SFabiano Rosas TCGv_ptr t_za, t_zr, t_pg; 137*f0984d40SFabiano Rosas TCGv_i32 t_desc; 138*f0984d40SFabiano Rosas int svl; 139*f0984d40SFabiano Rosas 140*f0984d40SFabiano Rosas if (!dc_isar_feature(aa64_sme, s)) { 141*f0984d40SFabiano Rosas return false; 142*f0984d40SFabiano Rosas } 143*f0984d40SFabiano Rosas if (!sme_smza_enabled_check(s)) { 144*f0984d40SFabiano Rosas return true; 145*f0984d40SFabiano Rosas } 146*f0984d40SFabiano Rosas 147*f0984d40SFabiano Rosas t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); 148*f0984d40SFabiano Rosas t_zr = vec_full_reg_ptr(s, a->zr); 149*f0984d40SFabiano Rosas t_pg = pred_full_reg_ptr(s, a->pg); 150*f0984d40SFabiano Rosas 151*f0984d40SFabiano Rosas svl = streaming_vec_reg_size(s); 152*f0984d40SFabiano Rosas t_desc = tcg_constant_i32(simd_desc(svl, svl, 0)); 153*f0984d40SFabiano Rosas 154*f0984d40SFabiano Rosas if (a->v) { 155*f0984d40SFabiano Rosas /* Vertical slice -- use sme mova helpers. */ 156*f0984d40SFabiano Rosas if (a->to_vec) { 157*f0984d40SFabiano Rosas zc_fns[a->esz](t_zr, t_za, t_pg, t_desc); 158*f0984d40SFabiano Rosas } else { 159*f0984d40SFabiano Rosas cz_fns[a->esz](t_za, t_zr, t_pg, t_desc); 160*f0984d40SFabiano Rosas } 161*f0984d40SFabiano Rosas } else { 162*f0984d40SFabiano Rosas /* Horizontal slice -- reuse sve sel helpers. */ 163*f0984d40SFabiano Rosas if (a->to_vec) { 164*f0984d40SFabiano Rosas h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc); 165*f0984d40SFabiano Rosas } else { 166*f0984d40SFabiano Rosas h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc); 167*f0984d40SFabiano Rosas } 168*f0984d40SFabiano Rosas } 169*f0984d40SFabiano Rosas 170*f0984d40SFabiano Rosas tcg_temp_free_ptr(t_za); 171*f0984d40SFabiano Rosas tcg_temp_free_ptr(t_zr); 172*f0984d40SFabiano Rosas tcg_temp_free_ptr(t_pg); 173*f0984d40SFabiano Rosas 174*f0984d40SFabiano Rosas return true; 175*f0984d40SFabiano Rosas } 176*f0984d40SFabiano Rosas 177*f0984d40SFabiano Rosas static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) 178*f0984d40SFabiano Rosas { 179*f0984d40SFabiano Rosas typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32); 180*f0984d40SFabiano Rosas 181*f0984d40SFabiano Rosas /* 182*f0984d40SFabiano Rosas * Indexed by [esz][be][v][mte][st], which is (except for load/store) 183*f0984d40SFabiano Rosas * also the order in which the elements appear in the function names, 184*f0984d40SFabiano Rosas * and so how we must concatenate the pieces. 185*f0984d40SFabiano Rosas */ 186*f0984d40SFabiano Rosas 187*f0984d40SFabiano Rosas #define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F } 188*f0984d40SFabiano Rosas #define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) } 189*f0984d40SFabiano Rosas #define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) } 190*f0984d40SFabiano Rosas #define FN_END(L, B) { FN_HV(L), FN_HV(B) } 191*f0984d40SFabiano Rosas 192*f0984d40SFabiano Rosas static GenLdSt1 * const fns[5][2][2][2][2] = { 193*f0984d40SFabiano Rosas FN_END(b, b), 194*f0984d40SFabiano Rosas FN_END(h_le, h_be), 195*f0984d40SFabiano Rosas FN_END(s_le, s_be), 196*f0984d40SFabiano Rosas FN_END(d_le, d_be), 197*f0984d40SFabiano Rosas FN_END(q_le, q_be), 198*f0984d40SFabiano Rosas }; 199*f0984d40SFabiano Rosas 200*f0984d40SFabiano Rosas #undef FN_LS 201*f0984d40SFabiano Rosas #undef FN_MTE 202*f0984d40SFabiano Rosas #undef FN_HV 203*f0984d40SFabiano Rosas #undef FN_END 204*f0984d40SFabiano Rosas 205*f0984d40SFabiano Rosas TCGv_ptr t_za, t_pg; 206*f0984d40SFabiano Rosas TCGv_i64 addr; 207*f0984d40SFabiano Rosas int svl, desc = 0; 208*f0984d40SFabiano Rosas bool be = s->be_data == MO_BE; 209*f0984d40SFabiano Rosas bool mte = s->mte_active[0]; 210*f0984d40SFabiano Rosas 211*f0984d40SFabiano Rosas if (!dc_isar_feature(aa64_sme, s)) { 212*f0984d40SFabiano Rosas return false; 213*f0984d40SFabiano Rosas } 214*f0984d40SFabiano Rosas if (!sme_smza_enabled_check(s)) { 215*f0984d40SFabiano Rosas return true; 216*f0984d40SFabiano Rosas } 217*f0984d40SFabiano Rosas 218*f0984d40SFabiano Rosas t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); 219*f0984d40SFabiano Rosas t_pg = pred_full_reg_ptr(s, a->pg); 220*f0984d40SFabiano Rosas addr = tcg_temp_new_i64(); 221*f0984d40SFabiano Rosas 222*f0984d40SFabiano Rosas tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz); 223*f0984d40SFabiano Rosas tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 224*f0984d40SFabiano Rosas 225*f0984d40SFabiano Rosas if (mte) { 226*f0984d40SFabiano Rosas desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 227*f0984d40SFabiano Rosas desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 228*f0984d40SFabiano Rosas desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 229*f0984d40SFabiano Rosas desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st); 230*f0984d40SFabiano Rosas desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1); 231*f0984d40SFabiano Rosas desc <<= SVE_MTEDESC_SHIFT; 232*f0984d40SFabiano Rosas } else { 233*f0984d40SFabiano Rosas addr = clean_data_tbi(s, addr); 234*f0984d40SFabiano Rosas } 235*f0984d40SFabiano Rosas svl = streaming_vec_reg_size(s); 236*f0984d40SFabiano Rosas desc = simd_desc(svl, svl, desc); 237*f0984d40SFabiano Rosas 238*f0984d40SFabiano Rosas fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr, 239*f0984d40SFabiano Rosas tcg_constant_i32(desc)); 240*f0984d40SFabiano Rosas 241*f0984d40SFabiano Rosas tcg_temp_free_ptr(t_za); 242*f0984d40SFabiano Rosas tcg_temp_free_ptr(t_pg); 243*f0984d40SFabiano Rosas tcg_temp_free_i64(addr); 244*f0984d40SFabiano Rosas return true; 245*f0984d40SFabiano Rosas } 246*f0984d40SFabiano Rosas 247*f0984d40SFabiano Rosas typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int); 248*f0984d40SFabiano Rosas 249*f0984d40SFabiano Rosas static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn) 250*f0984d40SFabiano Rosas { 251*f0984d40SFabiano Rosas int svl = streaming_vec_reg_size(s); 252*f0984d40SFabiano Rosas int imm = a->imm; 253*f0984d40SFabiano Rosas TCGv_ptr base; 254*f0984d40SFabiano Rosas 255*f0984d40SFabiano Rosas if (!sme_za_enabled_check(s)) { 256*f0984d40SFabiano Rosas return true; 257*f0984d40SFabiano Rosas } 258*f0984d40SFabiano Rosas 259*f0984d40SFabiano Rosas /* ZA[n] equates to ZA0H.B[n]. */ 260*f0984d40SFabiano Rosas base = get_tile_rowcol(s, MO_8, a->rv, imm, false); 261*f0984d40SFabiano Rosas 262*f0984d40SFabiano Rosas fn(s, base, 0, svl, a->rn, imm * svl); 263*f0984d40SFabiano Rosas 264*f0984d40SFabiano Rosas tcg_temp_free_ptr(base); 265*f0984d40SFabiano Rosas return true; 266*f0984d40SFabiano Rosas } 267*f0984d40SFabiano Rosas 268*f0984d40SFabiano Rosas TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr) 269*f0984d40SFabiano Rosas TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str) 270*f0984d40SFabiano Rosas 271*f0984d40SFabiano Rosas static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz, 272*f0984d40SFabiano Rosas gen_helper_gvec_4 *fn) 273*f0984d40SFabiano Rosas { 274*f0984d40SFabiano Rosas int svl = streaming_vec_reg_size(s); 275*f0984d40SFabiano Rosas uint32_t desc = simd_desc(svl, svl, 0); 276*f0984d40SFabiano Rosas TCGv_ptr za, zn, pn, pm; 277*f0984d40SFabiano Rosas 278*f0984d40SFabiano Rosas if (!sme_smza_enabled_check(s)) { 279*f0984d40SFabiano Rosas return true; 280*f0984d40SFabiano Rosas } 281*f0984d40SFabiano Rosas 282*f0984d40SFabiano Rosas /* Sum XZR+zad to find ZAd. */ 283*f0984d40SFabiano Rosas za = get_tile_rowcol(s, esz, 31, a->zad, false); 284*f0984d40SFabiano Rosas zn = vec_full_reg_ptr(s, a->zn); 285*f0984d40SFabiano Rosas pn = pred_full_reg_ptr(s, a->pn); 286*f0984d40SFabiano Rosas pm = pred_full_reg_ptr(s, a->pm); 287*f0984d40SFabiano Rosas 288*f0984d40SFabiano Rosas fn(za, zn, pn, pm, tcg_constant_i32(desc)); 289*f0984d40SFabiano Rosas 290*f0984d40SFabiano Rosas tcg_temp_free_ptr(za); 291*f0984d40SFabiano Rosas tcg_temp_free_ptr(zn); 292*f0984d40SFabiano Rosas tcg_temp_free_ptr(pn); 293*f0984d40SFabiano Rosas tcg_temp_free_ptr(pm); 294*f0984d40SFabiano Rosas return true; 295*f0984d40SFabiano Rosas } 296*f0984d40SFabiano Rosas 297*f0984d40SFabiano Rosas TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s) 298*f0984d40SFabiano Rosas TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s) 299*f0984d40SFabiano Rosas TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d) 300*f0984d40SFabiano Rosas TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d) 301*f0984d40SFabiano Rosas 302*f0984d40SFabiano Rosas static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz, 303*f0984d40SFabiano Rosas gen_helper_gvec_5 *fn) 304*f0984d40SFabiano Rosas { 305*f0984d40SFabiano Rosas int svl = streaming_vec_reg_size(s); 306*f0984d40SFabiano Rosas uint32_t desc = simd_desc(svl, svl, a->sub); 307*f0984d40SFabiano Rosas TCGv_ptr za, zn, zm, pn, pm; 308*f0984d40SFabiano Rosas 309*f0984d40SFabiano Rosas if (!sme_smza_enabled_check(s)) { 310*f0984d40SFabiano Rosas return true; 311*f0984d40SFabiano Rosas } 312*f0984d40SFabiano Rosas 313*f0984d40SFabiano Rosas /* Sum XZR+zad to find ZAd. */ 314*f0984d40SFabiano Rosas za = get_tile_rowcol(s, esz, 31, a->zad, false); 315*f0984d40SFabiano Rosas zn = vec_full_reg_ptr(s, a->zn); 316*f0984d40SFabiano Rosas zm = vec_full_reg_ptr(s, a->zm); 317*f0984d40SFabiano Rosas pn = pred_full_reg_ptr(s, a->pn); 318*f0984d40SFabiano Rosas pm = pred_full_reg_ptr(s, a->pm); 319*f0984d40SFabiano Rosas 320*f0984d40SFabiano Rosas fn(za, zn, zm, pn, pm, tcg_constant_i32(desc)); 321*f0984d40SFabiano Rosas 322*f0984d40SFabiano Rosas tcg_temp_free_ptr(za); 323*f0984d40SFabiano Rosas tcg_temp_free_ptr(zn); 324*f0984d40SFabiano Rosas tcg_temp_free_ptr(pn); 325*f0984d40SFabiano Rosas tcg_temp_free_ptr(pm); 326*f0984d40SFabiano Rosas return true; 327*f0984d40SFabiano Rosas } 328*f0984d40SFabiano Rosas 329*f0984d40SFabiano Rosas static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, 330*f0984d40SFabiano Rosas gen_helper_gvec_5_ptr *fn) 331*f0984d40SFabiano Rosas { 332*f0984d40SFabiano Rosas int svl = streaming_vec_reg_size(s); 333*f0984d40SFabiano Rosas uint32_t desc = simd_desc(svl, svl, a->sub); 334*f0984d40SFabiano Rosas TCGv_ptr za, zn, zm, pn, pm, fpst; 335*f0984d40SFabiano Rosas 336*f0984d40SFabiano Rosas if (!sme_smza_enabled_check(s)) { 337*f0984d40SFabiano Rosas return true; 338*f0984d40SFabiano Rosas } 339*f0984d40SFabiano Rosas 340*f0984d40SFabiano Rosas /* Sum XZR+zad to find ZAd. */ 341*f0984d40SFabiano Rosas za = get_tile_rowcol(s, esz, 31, a->zad, false); 342*f0984d40SFabiano Rosas zn = vec_full_reg_ptr(s, a->zn); 343*f0984d40SFabiano Rosas zm = vec_full_reg_ptr(s, a->zm); 344*f0984d40SFabiano Rosas pn = pred_full_reg_ptr(s, a->pn); 345*f0984d40SFabiano Rosas pm = pred_full_reg_ptr(s, a->pm); 346*f0984d40SFabiano Rosas fpst = fpstatus_ptr(FPST_FPCR); 347*f0984d40SFabiano Rosas 348*f0984d40SFabiano Rosas fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc)); 349*f0984d40SFabiano Rosas 350*f0984d40SFabiano Rosas tcg_temp_free_ptr(za); 351*f0984d40SFabiano Rosas tcg_temp_free_ptr(zn); 352*f0984d40SFabiano Rosas tcg_temp_free_ptr(pn); 353*f0984d40SFabiano Rosas tcg_temp_free_ptr(pm); 354*f0984d40SFabiano Rosas tcg_temp_free_ptr(fpst); 355*f0984d40SFabiano Rosas return true; 356*f0984d40SFabiano Rosas } 357*f0984d40SFabiano Rosas 358*f0984d40SFabiano Rosas TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h) 359*f0984d40SFabiano Rosas TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s) 360*f0984d40SFabiano Rosas TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d) 361*f0984d40SFabiano Rosas 362*f0984d40SFabiano Rosas /* TODO: FEAT_EBF16 */ 363*f0984d40SFabiano Rosas TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa) 364*f0984d40SFabiano Rosas 365*f0984d40SFabiano Rosas TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s) 366*f0984d40SFabiano Rosas TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s) 367*f0984d40SFabiano Rosas TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s) 368*f0984d40SFabiano Rosas TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s) 369*f0984d40SFabiano Rosas 370*f0984d40SFabiano Rosas TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d) 371*f0984d40SFabiano Rosas TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d) 372*f0984d40SFabiano Rosas TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d) 373*f0984d40SFabiano Rosas TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d) 374