xref: /openbmc/qemu/target/arm/tcg/translate-sme.c (revision f0984d4040c328d1c021ae6680479cbbe13c485b)
1*f0984d40SFabiano Rosas /*
2*f0984d40SFabiano Rosas  * AArch64 SME translation
3*f0984d40SFabiano Rosas  *
4*f0984d40SFabiano Rosas  * Copyright (c) 2022 Linaro, Ltd
5*f0984d40SFabiano Rosas  *
6*f0984d40SFabiano Rosas  * This library is free software; you can redistribute it and/or
7*f0984d40SFabiano Rosas  * modify it under the terms of the GNU Lesser General Public
8*f0984d40SFabiano Rosas  * License as published by the Free Software Foundation; either
9*f0984d40SFabiano Rosas  * version 2.1 of the License, or (at your option) any later version.
10*f0984d40SFabiano Rosas  *
11*f0984d40SFabiano Rosas  * This library is distributed in the hope that it will be useful,
12*f0984d40SFabiano Rosas  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13*f0984d40SFabiano Rosas  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14*f0984d40SFabiano Rosas  * Lesser General Public License for more details.
15*f0984d40SFabiano Rosas  *
16*f0984d40SFabiano Rosas  * You should have received a copy of the GNU Lesser General Public
17*f0984d40SFabiano Rosas  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18*f0984d40SFabiano Rosas  */
19*f0984d40SFabiano Rosas 
20*f0984d40SFabiano Rosas #include "qemu/osdep.h"
21*f0984d40SFabiano Rosas #include "cpu.h"
22*f0984d40SFabiano Rosas #include "tcg/tcg-op.h"
23*f0984d40SFabiano Rosas #include "tcg/tcg-op-gvec.h"
24*f0984d40SFabiano Rosas #include "tcg/tcg-gvec-desc.h"
25*f0984d40SFabiano Rosas #include "translate.h"
26*f0984d40SFabiano Rosas #include "exec/helper-gen.h"
27*f0984d40SFabiano Rosas #include "translate-a64.h"
28*f0984d40SFabiano Rosas #include "fpu/softfloat.h"
29*f0984d40SFabiano Rosas 
30*f0984d40SFabiano Rosas 
31*f0984d40SFabiano Rosas /*
32*f0984d40SFabiano Rosas  * Include the generated decoder.
33*f0984d40SFabiano Rosas  */
34*f0984d40SFabiano Rosas 
35*f0984d40SFabiano Rosas #include "decode-sme.c.inc"
36*f0984d40SFabiano Rosas 
37*f0984d40SFabiano Rosas 
38*f0984d40SFabiano Rosas /*
39*f0984d40SFabiano Rosas  * Resolve tile.size[index] to a host pointer, where tile and index
40*f0984d40SFabiano Rosas  * are always decoded together, dependent on the element size.
41*f0984d40SFabiano Rosas  */
42*f0984d40SFabiano Rosas static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
43*f0984d40SFabiano Rosas                                 int tile_index, bool vertical)
44*f0984d40SFabiano Rosas {
45*f0984d40SFabiano Rosas     int tile = tile_index >> (4 - esz);
46*f0984d40SFabiano Rosas     int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
47*f0984d40SFabiano Rosas     int pos, len, offset;
48*f0984d40SFabiano Rosas     TCGv_i32 tmp;
49*f0984d40SFabiano Rosas     TCGv_ptr addr;
50*f0984d40SFabiano Rosas 
51*f0984d40SFabiano Rosas     /* Compute the final index, which is Rs+imm. */
52*f0984d40SFabiano Rosas     tmp = tcg_temp_new_i32();
53*f0984d40SFabiano Rosas     tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
54*f0984d40SFabiano Rosas     tcg_gen_addi_i32(tmp, tmp, index);
55*f0984d40SFabiano Rosas 
56*f0984d40SFabiano Rosas     /* Prepare a power-of-two modulo via extraction of @len bits. */
57*f0984d40SFabiano Rosas     len = ctz32(streaming_vec_reg_size(s)) - esz;
58*f0984d40SFabiano Rosas 
59*f0984d40SFabiano Rosas     if (vertical) {
60*f0984d40SFabiano Rosas         /*
61*f0984d40SFabiano Rosas          * Compute the byte offset of the index within the tile:
62*f0984d40SFabiano Rosas          *     (index % (svl / size)) * size
63*f0984d40SFabiano Rosas          *   = (index % (svl >> esz)) << esz
64*f0984d40SFabiano Rosas          * Perform the power-of-two modulo via extraction of the low @len bits.
65*f0984d40SFabiano Rosas          * Perform the multiply by shifting left by @pos bits.
66*f0984d40SFabiano Rosas          * Perform these operations simultaneously via deposit into zero.
67*f0984d40SFabiano Rosas          */
68*f0984d40SFabiano Rosas         pos = esz;
69*f0984d40SFabiano Rosas         tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
70*f0984d40SFabiano Rosas 
71*f0984d40SFabiano Rosas         /*
72*f0984d40SFabiano Rosas          * For big-endian, adjust the indexed column byte offset within
73*f0984d40SFabiano Rosas          * the uint64_t host words that make up env->zarray[].
74*f0984d40SFabiano Rosas          */
75*f0984d40SFabiano Rosas         if (HOST_BIG_ENDIAN && esz < MO_64) {
76*f0984d40SFabiano Rosas             tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
77*f0984d40SFabiano Rosas         }
78*f0984d40SFabiano Rosas     } else {
79*f0984d40SFabiano Rosas         /*
80*f0984d40SFabiano Rosas          * Compute the byte offset of the index within the tile:
81*f0984d40SFabiano Rosas          *     (index % (svl / size)) * (size * sizeof(row))
82*f0984d40SFabiano Rosas          *   = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
83*f0984d40SFabiano Rosas          */
84*f0984d40SFabiano Rosas         pos = esz + ctz32(sizeof(ARMVectorReg));
85*f0984d40SFabiano Rosas         tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
86*f0984d40SFabiano Rosas 
87*f0984d40SFabiano Rosas         /* Row slices are always aligned and need no endian adjustment. */
88*f0984d40SFabiano Rosas     }
89*f0984d40SFabiano Rosas 
90*f0984d40SFabiano Rosas     /* The tile byte offset within env->zarray is the row. */
91*f0984d40SFabiano Rosas     offset = tile * sizeof(ARMVectorReg);
92*f0984d40SFabiano Rosas 
93*f0984d40SFabiano Rosas     /* Include the byte offset of zarray to make this relative to env. */
94*f0984d40SFabiano Rosas     offset += offsetof(CPUARMState, zarray);
95*f0984d40SFabiano Rosas     tcg_gen_addi_i32(tmp, tmp, offset);
96*f0984d40SFabiano Rosas 
97*f0984d40SFabiano Rosas     /* Add the byte offset to env to produce the final pointer. */
98*f0984d40SFabiano Rosas     addr = tcg_temp_new_ptr();
99*f0984d40SFabiano Rosas     tcg_gen_ext_i32_ptr(addr, tmp);
100*f0984d40SFabiano Rosas     tcg_temp_free_i32(tmp);
101*f0984d40SFabiano Rosas     tcg_gen_add_ptr(addr, addr, cpu_env);
102*f0984d40SFabiano Rosas 
103*f0984d40SFabiano Rosas     return addr;
104*f0984d40SFabiano Rosas }
105*f0984d40SFabiano Rosas 
106*f0984d40SFabiano Rosas static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
107*f0984d40SFabiano Rosas {
108*f0984d40SFabiano Rosas     if (!dc_isar_feature(aa64_sme, s)) {
109*f0984d40SFabiano Rosas         return false;
110*f0984d40SFabiano Rosas     }
111*f0984d40SFabiano Rosas     if (sme_za_enabled_check(s)) {
112*f0984d40SFabiano Rosas         gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
113*f0984d40SFabiano Rosas                             tcg_constant_i32(streaming_vec_reg_size(s)));
114*f0984d40SFabiano Rosas     }
115*f0984d40SFabiano Rosas     return true;
116*f0984d40SFabiano Rosas }
117*f0984d40SFabiano Rosas 
118*f0984d40SFabiano Rosas static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
119*f0984d40SFabiano Rosas {
120*f0984d40SFabiano Rosas     static gen_helper_gvec_4 * const h_fns[5] = {
121*f0984d40SFabiano Rosas         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
122*f0984d40SFabiano Rosas         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
123*f0984d40SFabiano Rosas         gen_helper_sve_sel_zpzz_q
124*f0984d40SFabiano Rosas     };
125*f0984d40SFabiano Rosas     static gen_helper_gvec_3 * const cz_fns[5] = {
126*f0984d40SFabiano Rosas         gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
127*f0984d40SFabiano Rosas         gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
128*f0984d40SFabiano Rosas         gen_helper_sme_mova_cz_q,
129*f0984d40SFabiano Rosas     };
130*f0984d40SFabiano Rosas     static gen_helper_gvec_3 * const zc_fns[5] = {
131*f0984d40SFabiano Rosas         gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
132*f0984d40SFabiano Rosas         gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
133*f0984d40SFabiano Rosas         gen_helper_sme_mova_zc_q,
134*f0984d40SFabiano Rosas     };
135*f0984d40SFabiano Rosas 
136*f0984d40SFabiano Rosas     TCGv_ptr t_za, t_zr, t_pg;
137*f0984d40SFabiano Rosas     TCGv_i32 t_desc;
138*f0984d40SFabiano Rosas     int svl;
139*f0984d40SFabiano Rosas 
140*f0984d40SFabiano Rosas     if (!dc_isar_feature(aa64_sme, s)) {
141*f0984d40SFabiano Rosas         return false;
142*f0984d40SFabiano Rosas     }
143*f0984d40SFabiano Rosas     if (!sme_smza_enabled_check(s)) {
144*f0984d40SFabiano Rosas         return true;
145*f0984d40SFabiano Rosas     }
146*f0984d40SFabiano Rosas 
147*f0984d40SFabiano Rosas     t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
148*f0984d40SFabiano Rosas     t_zr = vec_full_reg_ptr(s, a->zr);
149*f0984d40SFabiano Rosas     t_pg = pred_full_reg_ptr(s, a->pg);
150*f0984d40SFabiano Rosas 
151*f0984d40SFabiano Rosas     svl = streaming_vec_reg_size(s);
152*f0984d40SFabiano Rosas     t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
153*f0984d40SFabiano Rosas 
154*f0984d40SFabiano Rosas     if (a->v) {
155*f0984d40SFabiano Rosas         /* Vertical slice -- use sme mova helpers. */
156*f0984d40SFabiano Rosas         if (a->to_vec) {
157*f0984d40SFabiano Rosas             zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
158*f0984d40SFabiano Rosas         } else {
159*f0984d40SFabiano Rosas             cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
160*f0984d40SFabiano Rosas         }
161*f0984d40SFabiano Rosas     } else {
162*f0984d40SFabiano Rosas         /* Horizontal slice -- reuse sve sel helpers. */
163*f0984d40SFabiano Rosas         if (a->to_vec) {
164*f0984d40SFabiano Rosas             h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
165*f0984d40SFabiano Rosas         } else {
166*f0984d40SFabiano Rosas             h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
167*f0984d40SFabiano Rosas         }
168*f0984d40SFabiano Rosas     }
169*f0984d40SFabiano Rosas 
170*f0984d40SFabiano Rosas     tcg_temp_free_ptr(t_za);
171*f0984d40SFabiano Rosas     tcg_temp_free_ptr(t_zr);
172*f0984d40SFabiano Rosas     tcg_temp_free_ptr(t_pg);
173*f0984d40SFabiano Rosas 
174*f0984d40SFabiano Rosas     return true;
175*f0984d40SFabiano Rosas }
176*f0984d40SFabiano Rosas 
177*f0984d40SFabiano Rosas static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
178*f0984d40SFabiano Rosas {
179*f0984d40SFabiano Rosas     typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
180*f0984d40SFabiano Rosas 
181*f0984d40SFabiano Rosas     /*
182*f0984d40SFabiano Rosas      * Indexed by [esz][be][v][mte][st], which is (except for load/store)
183*f0984d40SFabiano Rosas      * also the order in which the elements appear in the function names,
184*f0984d40SFabiano Rosas      * and so how we must concatenate the pieces.
185*f0984d40SFabiano Rosas      */
186*f0984d40SFabiano Rosas 
187*f0984d40SFabiano Rosas #define FN_LS(F)     { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
188*f0984d40SFabiano Rosas #define FN_MTE(F)    { FN_LS(F), FN_LS(F##_mte) }
189*f0984d40SFabiano Rosas #define FN_HV(F)     { FN_MTE(F##_h), FN_MTE(F##_v) }
190*f0984d40SFabiano Rosas #define FN_END(L, B) { FN_HV(L), FN_HV(B) }
191*f0984d40SFabiano Rosas 
192*f0984d40SFabiano Rosas     static GenLdSt1 * const fns[5][2][2][2][2] = {
193*f0984d40SFabiano Rosas         FN_END(b, b),
194*f0984d40SFabiano Rosas         FN_END(h_le, h_be),
195*f0984d40SFabiano Rosas         FN_END(s_le, s_be),
196*f0984d40SFabiano Rosas         FN_END(d_le, d_be),
197*f0984d40SFabiano Rosas         FN_END(q_le, q_be),
198*f0984d40SFabiano Rosas     };
199*f0984d40SFabiano Rosas 
200*f0984d40SFabiano Rosas #undef FN_LS
201*f0984d40SFabiano Rosas #undef FN_MTE
202*f0984d40SFabiano Rosas #undef FN_HV
203*f0984d40SFabiano Rosas #undef FN_END
204*f0984d40SFabiano Rosas 
205*f0984d40SFabiano Rosas     TCGv_ptr t_za, t_pg;
206*f0984d40SFabiano Rosas     TCGv_i64 addr;
207*f0984d40SFabiano Rosas     int svl, desc = 0;
208*f0984d40SFabiano Rosas     bool be = s->be_data == MO_BE;
209*f0984d40SFabiano Rosas     bool mte = s->mte_active[0];
210*f0984d40SFabiano Rosas 
211*f0984d40SFabiano Rosas     if (!dc_isar_feature(aa64_sme, s)) {
212*f0984d40SFabiano Rosas         return false;
213*f0984d40SFabiano Rosas     }
214*f0984d40SFabiano Rosas     if (!sme_smza_enabled_check(s)) {
215*f0984d40SFabiano Rosas         return true;
216*f0984d40SFabiano Rosas     }
217*f0984d40SFabiano Rosas 
218*f0984d40SFabiano Rosas     t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
219*f0984d40SFabiano Rosas     t_pg = pred_full_reg_ptr(s, a->pg);
220*f0984d40SFabiano Rosas     addr = tcg_temp_new_i64();
221*f0984d40SFabiano Rosas 
222*f0984d40SFabiano Rosas     tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
223*f0984d40SFabiano Rosas     tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
224*f0984d40SFabiano Rosas 
225*f0984d40SFabiano Rosas     if (mte) {
226*f0984d40SFabiano Rosas         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
227*f0984d40SFabiano Rosas         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
228*f0984d40SFabiano Rosas         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
229*f0984d40SFabiano Rosas         desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
230*f0984d40SFabiano Rosas         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
231*f0984d40SFabiano Rosas         desc <<= SVE_MTEDESC_SHIFT;
232*f0984d40SFabiano Rosas     } else {
233*f0984d40SFabiano Rosas         addr = clean_data_tbi(s, addr);
234*f0984d40SFabiano Rosas     }
235*f0984d40SFabiano Rosas     svl = streaming_vec_reg_size(s);
236*f0984d40SFabiano Rosas     desc = simd_desc(svl, svl, desc);
237*f0984d40SFabiano Rosas 
238*f0984d40SFabiano Rosas     fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
239*f0984d40SFabiano Rosas                                       tcg_constant_i32(desc));
240*f0984d40SFabiano Rosas 
241*f0984d40SFabiano Rosas     tcg_temp_free_ptr(t_za);
242*f0984d40SFabiano Rosas     tcg_temp_free_ptr(t_pg);
243*f0984d40SFabiano Rosas     tcg_temp_free_i64(addr);
244*f0984d40SFabiano Rosas     return true;
245*f0984d40SFabiano Rosas }
246*f0984d40SFabiano Rosas 
247*f0984d40SFabiano Rosas typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
248*f0984d40SFabiano Rosas 
249*f0984d40SFabiano Rosas static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
250*f0984d40SFabiano Rosas {
251*f0984d40SFabiano Rosas     int svl = streaming_vec_reg_size(s);
252*f0984d40SFabiano Rosas     int imm = a->imm;
253*f0984d40SFabiano Rosas     TCGv_ptr base;
254*f0984d40SFabiano Rosas 
255*f0984d40SFabiano Rosas     if (!sme_za_enabled_check(s)) {
256*f0984d40SFabiano Rosas         return true;
257*f0984d40SFabiano Rosas     }
258*f0984d40SFabiano Rosas 
259*f0984d40SFabiano Rosas     /* ZA[n] equates to ZA0H.B[n]. */
260*f0984d40SFabiano Rosas     base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
261*f0984d40SFabiano Rosas 
262*f0984d40SFabiano Rosas     fn(s, base, 0, svl, a->rn, imm * svl);
263*f0984d40SFabiano Rosas 
264*f0984d40SFabiano Rosas     tcg_temp_free_ptr(base);
265*f0984d40SFabiano Rosas     return true;
266*f0984d40SFabiano Rosas }
267*f0984d40SFabiano Rosas 
268*f0984d40SFabiano Rosas TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
269*f0984d40SFabiano Rosas TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
270*f0984d40SFabiano Rosas 
271*f0984d40SFabiano Rosas static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
272*f0984d40SFabiano Rosas                     gen_helper_gvec_4 *fn)
273*f0984d40SFabiano Rosas {
274*f0984d40SFabiano Rosas     int svl = streaming_vec_reg_size(s);
275*f0984d40SFabiano Rosas     uint32_t desc = simd_desc(svl, svl, 0);
276*f0984d40SFabiano Rosas     TCGv_ptr za, zn, pn, pm;
277*f0984d40SFabiano Rosas 
278*f0984d40SFabiano Rosas     if (!sme_smza_enabled_check(s)) {
279*f0984d40SFabiano Rosas         return true;
280*f0984d40SFabiano Rosas     }
281*f0984d40SFabiano Rosas 
282*f0984d40SFabiano Rosas     /* Sum XZR+zad to find ZAd. */
283*f0984d40SFabiano Rosas     za = get_tile_rowcol(s, esz, 31, a->zad, false);
284*f0984d40SFabiano Rosas     zn = vec_full_reg_ptr(s, a->zn);
285*f0984d40SFabiano Rosas     pn = pred_full_reg_ptr(s, a->pn);
286*f0984d40SFabiano Rosas     pm = pred_full_reg_ptr(s, a->pm);
287*f0984d40SFabiano Rosas 
288*f0984d40SFabiano Rosas     fn(za, zn, pn, pm, tcg_constant_i32(desc));
289*f0984d40SFabiano Rosas 
290*f0984d40SFabiano Rosas     tcg_temp_free_ptr(za);
291*f0984d40SFabiano Rosas     tcg_temp_free_ptr(zn);
292*f0984d40SFabiano Rosas     tcg_temp_free_ptr(pn);
293*f0984d40SFabiano Rosas     tcg_temp_free_ptr(pm);
294*f0984d40SFabiano Rosas     return true;
295*f0984d40SFabiano Rosas }
296*f0984d40SFabiano Rosas 
297*f0984d40SFabiano Rosas TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
298*f0984d40SFabiano Rosas TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
299*f0984d40SFabiano Rosas TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
300*f0984d40SFabiano Rosas TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
301*f0984d40SFabiano Rosas 
302*f0984d40SFabiano Rosas static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
303*f0984d40SFabiano Rosas                        gen_helper_gvec_5 *fn)
304*f0984d40SFabiano Rosas {
305*f0984d40SFabiano Rosas     int svl = streaming_vec_reg_size(s);
306*f0984d40SFabiano Rosas     uint32_t desc = simd_desc(svl, svl, a->sub);
307*f0984d40SFabiano Rosas     TCGv_ptr za, zn, zm, pn, pm;
308*f0984d40SFabiano Rosas 
309*f0984d40SFabiano Rosas     if (!sme_smza_enabled_check(s)) {
310*f0984d40SFabiano Rosas         return true;
311*f0984d40SFabiano Rosas     }
312*f0984d40SFabiano Rosas 
313*f0984d40SFabiano Rosas     /* Sum XZR+zad to find ZAd. */
314*f0984d40SFabiano Rosas     za = get_tile_rowcol(s, esz, 31, a->zad, false);
315*f0984d40SFabiano Rosas     zn = vec_full_reg_ptr(s, a->zn);
316*f0984d40SFabiano Rosas     zm = vec_full_reg_ptr(s, a->zm);
317*f0984d40SFabiano Rosas     pn = pred_full_reg_ptr(s, a->pn);
318*f0984d40SFabiano Rosas     pm = pred_full_reg_ptr(s, a->pm);
319*f0984d40SFabiano Rosas 
320*f0984d40SFabiano Rosas     fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
321*f0984d40SFabiano Rosas 
322*f0984d40SFabiano Rosas     tcg_temp_free_ptr(za);
323*f0984d40SFabiano Rosas     tcg_temp_free_ptr(zn);
324*f0984d40SFabiano Rosas     tcg_temp_free_ptr(pn);
325*f0984d40SFabiano Rosas     tcg_temp_free_ptr(pm);
326*f0984d40SFabiano Rosas     return true;
327*f0984d40SFabiano Rosas }
328*f0984d40SFabiano Rosas 
329*f0984d40SFabiano Rosas static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
330*f0984d40SFabiano Rosas                             gen_helper_gvec_5_ptr *fn)
331*f0984d40SFabiano Rosas {
332*f0984d40SFabiano Rosas     int svl = streaming_vec_reg_size(s);
333*f0984d40SFabiano Rosas     uint32_t desc = simd_desc(svl, svl, a->sub);
334*f0984d40SFabiano Rosas     TCGv_ptr za, zn, zm, pn, pm, fpst;
335*f0984d40SFabiano Rosas 
336*f0984d40SFabiano Rosas     if (!sme_smza_enabled_check(s)) {
337*f0984d40SFabiano Rosas         return true;
338*f0984d40SFabiano Rosas     }
339*f0984d40SFabiano Rosas 
340*f0984d40SFabiano Rosas     /* Sum XZR+zad to find ZAd. */
341*f0984d40SFabiano Rosas     za = get_tile_rowcol(s, esz, 31, a->zad, false);
342*f0984d40SFabiano Rosas     zn = vec_full_reg_ptr(s, a->zn);
343*f0984d40SFabiano Rosas     zm = vec_full_reg_ptr(s, a->zm);
344*f0984d40SFabiano Rosas     pn = pred_full_reg_ptr(s, a->pn);
345*f0984d40SFabiano Rosas     pm = pred_full_reg_ptr(s, a->pm);
346*f0984d40SFabiano Rosas     fpst = fpstatus_ptr(FPST_FPCR);
347*f0984d40SFabiano Rosas 
348*f0984d40SFabiano Rosas     fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
349*f0984d40SFabiano Rosas 
350*f0984d40SFabiano Rosas     tcg_temp_free_ptr(za);
351*f0984d40SFabiano Rosas     tcg_temp_free_ptr(zn);
352*f0984d40SFabiano Rosas     tcg_temp_free_ptr(pn);
353*f0984d40SFabiano Rosas     tcg_temp_free_ptr(pm);
354*f0984d40SFabiano Rosas     tcg_temp_free_ptr(fpst);
355*f0984d40SFabiano Rosas     return true;
356*f0984d40SFabiano Rosas }
357*f0984d40SFabiano Rosas 
358*f0984d40SFabiano Rosas TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
359*f0984d40SFabiano Rosas TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
360*f0984d40SFabiano Rosas TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
361*f0984d40SFabiano Rosas 
362*f0984d40SFabiano Rosas /* TODO: FEAT_EBF16 */
363*f0984d40SFabiano Rosas TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
364*f0984d40SFabiano Rosas 
365*f0984d40SFabiano Rosas TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
366*f0984d40SFabiano Rosas TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
367*f0984d40SFabiano Rosas TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
368*f0984d40SFabiano Rosas TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
369*f0984d40SFabiano Rosas 
370*f0984d40SFabiano Rosas TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
371*f0984d40SFabiano Rosas TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
372*f0984d40SFabiano Rosas TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
373*f0984d40SFabiano Rosas TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
374